• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**************************************************************************
2  *
3  * Copyright 2017 Advanced Micro Devices, Inc.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * on the rights to use, copy, modify, merge, publish, distribute, sub
10  * license, and/or sell copies of the Software, and to permit persons to whom
11  * the Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
20  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
21  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
22  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
23  * USE OR OTHER DEALINGS IN THE SOFTWARE.
24  *
25  **************************************************************************/
26 
27 #include "util/u_threaded_context.h"
28 #include "util/u_cpu_detect.h"
29 #include "util/format/u_format.h"
30 #include "util/u_inlines.h"
31 #include "util/u_memory.h"
32 #include "util/u_upload_mgr.h"
33 #include "driver_trace/tr_context.h"
34 #include "util/log.h"
35 #include "util/perf/cpu_trace.h"
36 #include "util/thread_sched.h"
37 #include "compiler/shader_info.h"
38 
39 #if TC_DEBUG >= 1
40 #define tc_assert assert
41 #else
42 #define tc_assert(x)
43 #endif
44 
45 #if TC_DEBUG >= 2
46 #define tc_printf mesa_logi
47 #define tc_asprintf asprintf
48 #define tc_strcmp strcmp
49 #else
50 #define tc_printf(...)
51 #define tc_asprintf(...) 0
52 #define tc_strcmp(...) 0
53 #endif
54 
55 #define TC_SENTINEL 0x5ca1ab1e
56 
57 #if TC_DEBUG >= 3 || defined(TC_TRACE)
58 static const char *tc_call_names[] = {
59 #define CALL(name) #name,
60 #include "u_threaded_context_calls.h"
61 #undef CALL
62 };
63 #endif
64 
65 #ifdef TC_TRACE
66 #  define TC_TRACE_SCOPE(call_id) MESA_TRACE_SCOPE(tc_call_names[call_id])
67 #else
68 #  define TC_TRACE_SCOPE(call_id)
69 #endif
70 
71 enum tc_call_id {
72 #define CALL(name) TC_CALL_##name,
73 #include "u_threaded_context_calls.h"
74 #undef CALL
75    TC_NUM_CALLS,
76    TC_END_BATCH = TC_NUM_CALLS,
77 };
78 
79 static void
80 tc_batch_execute(void *job, UNUSED void *gdata, int thread_index);
81 
82 static void
83 tc_buffer_subdata(struct pipe_context *_pipe,
84                   struct pipe_resource *resource,
85                   unsigned usage, unsigned offset,
86                   unsigned size, const void *data);
87 
88 static void
tc_batch_check(UNUSED struct tc_batch * batch)89 tc_batch_check(UNUSED struct tc_batch *batch)
90 {
91    tc_assert(batch->sentinel == TC_SENTINEL);
92    tc_assert(batch->num_total_slots <= TC_SLOTS_PER_BATCH);
93 }
94 
95 static void
tc_debug_check(struct threaded_context * tc)96 tc_debug_check(struct threaded_context *tc)
97 {
98    for (unsigned i = 0; i < TC_MAX_BATCHES; i++) {
99       tc_batch_check(&tc->batch_slots[i]);
100       tc_assert(tc->batch_slots[i].tc == tc);
101    }
102 }
103 
104 static void
tc_set_driver_thread(struct threaded_context * tc)105 tc_set_driver_thread(struct threaded_context *tc)
106 {
107 #ifndef NDEBUG
108    tc->driver_thread = thrd_current();
109 #endif
110 }
111 
112 static void
tc_clear_driver_thread(struct threaded_context * tc)113 tc_clear_driver_thread(struct threaded_context *tc)
114 {
115 #ifndef NDEBUG
116    memset(&tc->driver_thread, 0, sizeof(tc->driver_thread));
117 #endif
118 }
119 
120 struct tc_batch_rp_info {
121    /* this is what drivers can see */
122    struct tc_renderpass_info info;
123    /* determines whether the info can be "safely" read by drivers or if it may still be in use */
124    struct util_queue_fence ready;
125    /* when a batch is full, the rp info rollsover onto 'next' */
126    struct tc_batch_rp_info *next;
127    /* when rp info has rolled over onto this struct, 'prev' is used to update pointers for realloc */
128    struct tc_batch_rp_info *prev;
129 };
130 
131 static struct tc_batch_rp_info *
tc_batch_rp_info(struct tc_renderpass_info * info)132 tc_batch_rp_info(struct tc_renderpass_info *info)
133 {
134    return (struct tc_batch_rp_info *)info;
135 }
136 
137 static void
tc_sanitize_renderpass_info(struct threaded_context * tc)138 tc_sanitize_renderpass_info(struct threaded_context *tc)
139 {
140    tc->renderpass_info_recording->cbuf_invalidate = 0;
141    tc->renderpass_info_recording->zsbuf_invalidate = false;
142    tc->renderpass_info_recording->cbuf_load |= (~tc->renderpass_info_recording->cbuf_clear) & BITFIELD_MASK(PIPE_MAX_COLOR_BUFS);
143    if (tc->fb_resources[PIPE_MAX_COLOR_BUFS] && !tc_renderpass_info_is_zsbuf_used(tc->renderpass_info_recording))
144       /* this should be a "safe" way to indicate to the driver that both loads and stores are required;
145       * driver can always detect invalidation
146       */
147       tc->renderpass_info_recording->zsbuf_clear_partial = true;
148    if (tc->num_queries_active)
149       tc->renderpass_info_recording->has_query_ends = true;
150 }
151 
152 /* ensure the batch's array of renderpass data is large enough for the current index */
153 static void
tc_batch_renderpass_infos_resize(struct threaded_context * tc,struct tc_batch * batch)154 tc_batch_renderpass_infos_resize(struct threaded_context *tc, struct tc_batch *batch)
155 {
156    unsigned size = batch->renderpass_infos.capacity;
157    unsigned cur_num = MAX2(batch->renderpass_info_idx, 0);
158 
159    if (size / sizeof(struct tc_batch_rp_info) > cur_num)
160       return;
161 
162    struct tc_batch_rp_info *infos = batch->renderpass_infos.data;
163    unsigned old_idx = batch->renderpass_info_idx - 1;
164    bool redo = tc->renderpass_info_recording &&
165                tc->renderpass_info_recording == &infos[old_idx].info;
166    if (!util_dynarray_resize(&batch->renderpass_infos, struct tc_batch_rp_info, cur_num + 10))
167       mesa_loge("tc: memory alloc fail!");
168 
169    if (size != batch->renderpass_infos.capacity) {
170       /* zero new allocation region */
171       uint8_t *data = batch->renderpass_infos.data;
172       memset(data + size, 0, batch->renderpass_infos.capacity - size);
173       unsigned start = size / sizeof(struct tc_batch_rp_info);
174       unsigned count = (batch->renderpass_infos.capacity - size) /
175                        sizeof(struct tc_batch_rp_info);
176       infos = batch->renderpass_infos.data;
177       if (infos->prev)
178          infos->prev->next = infos;
179       for (unsigned i = 0; i < count; i++)
180          util_queue_fence_init(&infos[start + i].ready);
181       /* re-set current recording info on resize */
182       if (redo)
183          tc->renderpass_info_recording = &infos[old_idx].info;
184    }
185 }
186 
187 /* signal that the renderpass info is "ready" for use by drivers and will no longer be updated */
188 static void
tc_signal_renderpass_info_ready(struct threaded_context * tc)189 tc_signal_renderpass_info_ready(struct threaded_context *tc)
190 {
191    if (tc->renderpass_info_recording &&
192        !util_queue_fence_is_signalled(&tc_batch_rp_info(tc->renderpass_info_recording)->ready))
193       util_queue_fence_signal(&tc_batch_rp_info(tc->renderpass_info_recording)->ready);
194 }
195 
196 /* increment the current renderpass info struct for recording
197  * 'full_copy' is used for preserving data across non-blocking tc batch flushes
198  */
199 static void
tc_batch_increment_renderpass_info(struct threaded_context * tc,unsigned batch_idx,bool full_copy)200 tc_batch_increment_renderpass_info(struct threaded_context *tc, unsigned batch_idx, bool full_copy)
201 {
202    struct tc_batch *batch = &tc->batch_slots[batch_idx];
203    struct tc_batch_rp_info *tc_info = batch->renderpass_infos.data;
204 
205    if (tc_info[0].next || batch->num_total_slots) {
206       /* deadlock condition detected: all batches are in flight, renderpass hasn't ended
207        * (probably a cts case)
208        */
209       struct tc_batch_rp_info *info = tc_batch_rp_info(tc->renderpass_info_recording);
210       if (!util_queue_fence_is_signalled(&info->ready)) {
211          /* this batch is actively executing and the driver is waiting on the recording fence to signal */
212          /* force all buffer usage to avoid data loss */
213          info->info.cbuf_load = ~(BITFIELD_MASK(8) & info->info.cbuf_clear);
214          info->info.zsbuf_clear_partial = true;
215          info->info.has_query_ends = tc->num_queries_active > 0;
216          /* ensure threaded_context_get_renderpass_info() won't deadlock */
217          info->next = NULL;
218          util_queue_fence_signal(&info->ready);
219       }
220       /* always wait on the batch to finish since this will otherwise overwrite thread data */
221       util_queue_fence_wait(&batch->fence);
222    }
223    /* increment rp info and initialize it */
224    batch->renderpass_info_idx++;
225    tc_batch_renderpass_infos_resize(tc, batch);
226    tc_info = batch->renderpass_infos.data;
227 
228    if (full_copy) {
229       /* this should only be called when changing batches */
230       assert(batch->renderpass_info_idx == 0);
231       /* copy the previous data in its entirety: this is still the same renderpass */
232       if (tc->renderpass_info_recording) {
233          tc_info[batch->renderpass_info_idx].info.data = tc->renderpass_info_recording->data;
234          tc_batch_rp_info(tc->renderpass_info_recording)->next = &tc_info[batch->renderpass_info_idx];
235          tc_info[batch->renderpass_info_idx].prev = tc_batch_rp_info(tc->renderpass_info_recording);
236          /* guard against deadlock scenario */
237          assert(&tc_batch_rp_info(tc->renderpass_info_recording)->next->info != tc->renderpass_info_recording);
238       } else {
239          tc_info[batch->renderpass_info_idx].info.data = 0;
240          tc_info[batch->renderpass_info_idx].prev = NULL;
241       }
242    } else {
243       /* selectively copy: only the CSO metadata is copied, and a new framebuffer state will be added later */
244       tc_info[batch->renderpass_info_idx].info.data = 0;
245       if (tc->renderpass_info_recording) {
246          tc_info[batch->renderpass_info_idx].info.data16[2] = tc->renderpass_info_recording->data16[2];
247          tc_batch_rp_info(tc->renderpass_info_recording)->next = NULL;
248          tc_info[batch->renderpass_info_idx].prev = NULL;
249       }
250    }
251 
252    assert(!full_copy || !tc->renderpass_info_recording || tc_batch_rp_info(tc->renderpass_info_recording)->next);
253    /* signal existing info since it will not be used anymore */
254    tc_signal_renderpass_info_ready(tc);
255    util_queue_fence_reset(&tc_info[batch->renderpass_info_idx].ready);
256    /* guard against deadlock scenario */
257    assert(tc->renderpass_info_recording != &tc_info[batch->renderpass_info_idx].info);
258    /* this is now the current recording renderpass info */
259    tc->renderpass_info_recording = &tc_info[batch->renderpass_info_idx].info;
260    batch->max_renderpass_info_idx = batch->renderpass_info_idx;
261 }
262 
263 static ALWAYS_INLINE struct tc_renderpass_info *
tc_get_renderpass_info(struct threaded_context * tc)264 tc_get_renderpass_info(struct threaded_context *tc)
265 {
266    return tc->renderpass_info_recording;
267 }
268 
269 /* update metadata at draw time */
270 static void
tc_parse_draw(struct threaded_context * tc)271 tc_parse_draw(struct threaded_context *tc)
272 {
273    struct tc_renderpass_info *info = tc_get_renderpass_info(tc);
274 
275    if (info) {
276       /* all buffers that aren't cleared are considered loaded */
277       info->cbuf_load |= ~info->cbuf_clear;
278       if (!info->zsbuf_clear)
279          info->zsbuf_load = true;
280       /* previous invalidates are no longer relevant */
281       info->cbuf_invalidate = 0;
282       info->zsbuf_invalidate = false;
283       info->has_draw = true;
284       info->has_query_ends |= tc->query_ended;
285    }
286 
287    tc->in_renderpass = true;
288    tc->seen_fb_state = true;
289    tc->query_ended = false;
290 }
291 
292 static void *
to_call_check(void * ptr,unsigned num_slots)293 to_call_check(void *ptr, unsigned num_slots)
294 {
295 #if TC_DEBUG >= 1
296    struct tc_call_base *call = ptr;
297    tc_assert(call->num_slots == num_slots);
298 #endif
299    return ptr;
300 }
301 #define to_call(ptr, type) ((struct type *)to_call_check((void *)(ptr), call_size(type)))
302 
303 #define size_to_slots(size)      DIV_ROUND_UP(size, 8)
304 #define call_size(type)          size_to_slots(sizeof(struct type))
305 #define call_size_with_slots(type, num_slots) size_to_slots( \
306    sizeof(struct type) + sizeof(((struct type*)NULL)->slot[0]) * (num_slots))
307 #define get_next_call(ptr, type) ((struct type*)((uint64_t*)ptr + call_size(type)))
308 
309 ALWAYS_INLINE static void
tc_set_resource_batch_usage(struct threaded_context * tc,struct pipe_resource * pres)310 tc_set_resource_batch_usage(struct threaded_context *tc, struct pipe_resource *pres)
311 {
312    /* ignore batch usage when persistent */
313    if (threaded_resource(pres)->last_batch_usage != INT8_MAX)
314       threaded_resource(pres)->last_batch_usage = tc->next;
315    threaded_resource(pres)->batch_generation = tc->batch_generation;
316 }
317 
318 ALWAYS_INLINE static void
tc_set_resource_batch_usage_persistent(struct threaded_context * tc,struct pipe_resource * pres,bool enable)319 tc_set_resource_batch_usage_persistent(struct threaded_context *tc, struct pipe_resource *pres, bool enable)
320 {
321    if (!pres)
322       return;
323    /* mark with special value to block any unsynchronized access */
324    threaded_resource(pres)->last_batch_usage = enable ? INT8_MAX : tc->next;
325    threaded_resource(pres)->batch_generation = tc->batch_generation;
326 }
327 
328 /* this can ONLY be used to check against the currently recording batch */
329 ALWAYS_INLINE static bool
tc_resource_batch_usage_test_busy(const struct threaded_context * tc,const struct pipe_resource * pres)330 tc_resource_batch_usage_test_busy(const struct threaded_context *tc, const struct pipe_resource *pres)
331 {
332    const struct threaded_resource *tbuf = (const struct threaded_resource*)pres;
333 
334    if (!tc->options.unsynchronized_texture_subdata)
335       return true;
336 
337    /* resource has persistent access: assume always busy */
338    if (tbuf->last_batch_usage == INT8_MAX)
339       return true;
340 
341    /* resource has never been seen */
342    if (tbuf->last_batch_usage == -1)
343       return false;
344 
345    /* resource has been seen but no batches have executed */
346    if (tc->last_completed == -1)
347       return true;
348 
349    /* begin comparisons checking number of times batches have cycled */
350    unsigned diff = tc->batch_generation - tbuf->batch_generation;
351    /* resource has been seen, batches have fully cycled at least once */
352    if (diff > 1)
353       return false;
354 
355    /* resource has been seen in current batch cycle: return whether batch has definitely completed */
356    if (diff == 0)
357       return tc->last_completed >= tbuf->last_batch_usage;
358 
359    /* resource has been seen within one batch cycle: check for batch wrapping */
360    if (tc->last_completed >= tbuf->last_batch_usage)
361       /* this or a subsequent pre-wrap batch was the last to definitely complete: resource is idle */
362       return false;
363 
364    /* batch execution has not definitely wrapped: resource is definitely not idle */
365    if (tc->last_completed > tc->next)
366       return true;
367 
368    /* resource was seen pre-wrap, batch execution has definitely wrapped: idle */
369    if (tbuf->last_batch_usage > tc->last_completed)
370       return false;
371 
372    /* tc->last_completed is not an exact measurement, so anything else is considered busy */
373    return true;
374 }
375 
376 /* Assign src to dst while dst is uninitialized. */
377 static inline void
tc_set_resource_reference(struct pipe_resource ** dst,struct pipe_resource * src)378 tc_set_resource_reference(struct pipe_resource **dst, struct pipe_resource *src)
379 {
380    *dst = src;
381    pipe_reference(NULL, &src->reference); /* only increment refcount */
382 }
383 
384 /* Assign src to dst while dst is uninitialized. */
385 static inline void
tc_set_vertex_state_reference(struct pipe_vertex_state ** dst,struct pipe_vertex_state * src)386 tc_set_vertex_state_reference(struct pipe_vertex_state **dst,
387                               struct pipe_vertex_state *src)
388 {
389    *dst = src;
390    pipe_reference(NULL, &src->reference); /* only increment refcount */
391 }
392 
393 /* Unreference dst but don't touch the dst pointer. */
394 static inline void
tc_drop_resource_reference(struct pipe_resource * dst)395 tc_drop_resource_reference(struct pipe_resource *dst)
396 {
397    if (pipe_reference(&dst->reference, NULL)) /* only decrement refcount */
398       pipe_resource_destroy(dst);
399 }
400 
401 /* Unreference dst but don't touch the dst pointer. */
402 static inline void
tc_drop_surface_reference(struct pipe_surface * dst)403 tc_drop_surface_reference(struct pipe_surface *dst)
404 {
405    if (pipe_reference(&dst->reference, NULL)) /* only decrement refcount */
406       dst->context->surface_destroy(dst->context, dst);
407 }
408 
409 /* Unreference dst but don't touch the dst pointer. */
410 static inline void
tc_drop_so_target_reference(struct pipe_stream_output_target * dst)411 tc_drop_so_target_reference(struct pipe_stream_output_target *dst)
412 {
413    if (pipe_reference(&dst->reference, NULL)) /* only decrement refcount */
414       dst->context->stream_output_target_destroy(dst->context, dst);
415 }
416 
417 /**
418  * Subtract the given number of references.
419  */
420 static inline void
tc_drop_vertex_state_references(struct pipe_vertex_state * dst,int num_refs)421 tc_drop_vertex_state_references(struct pipe_vertex_state *dst, int num_refs)
422 {
423    int count = p_atomic_add_return(&dst->reference.count, -num_refs);
424 
425    assert(count >= 0);
426    /* Underflows shouldn't happen, but let's be safe. */
427    if (count <= 0)
428       dst->screen->vertex_state_destroy(dst->screen, dst);
429 }
430 
431 /* We don't want to read or write min_index and max_index, because
432  * it shouldn't be needed by drivers at this point.
433  */
434 #define DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX \
435    offsetof(struct pipe_draw_info, min_index)
436 
437 ALWAYS_INLINE static struct tc_renderpass_info *
incr_rp_info(struct tc_renderpass_info * tc_info)438 incr_rp_info(struct tc_renderpass_info *tc_info)
439 {
440    struct tc_batch_rp_info *info = tc_batch_rp_info(tc_info);
441    return &info[1].info;
442 }
443 
444 static void
tc_begin_next_buffer_list(struct threaded_context * tc)445 tc_begin_next_buffer_list(struct threaded_context *tc)
446 {
447    tc->next_buf_list = (tc->next_buf_list + 1) % TC_MAX_BUFFER_LISTS;
448 
449    tc->batch_slots[tc->next].buffer_list_index = tc->next_buf_list;
450 
451    /* Clear the buffer list in the new empty batch. */
452    struct tc_buffer_list *buf_list = &tc->buffer_lists[tc->next_buf_list];
453    assert(util_queue_fence_is_signalled(&buf_list->driver_flushed_fence));
454    util_queue_fence_reset(&buf_list->driver_flushed_fence); /* set to unsignalled */
455    BITSET_ZERO(buf_list->buffer_list);
456 
457    tc->add_all_gfx_bindings_to_buffer_list = true;
458    tc->add_all_compute_bindings_to_buffer_list = true;
459 }
460 
461 static void
tc_add_call_end(struct tc_batch * next)462 tc_add_call_end(struct tc_batch *next)
463 {
464    /* Add a dummy last call that won't be executed, but will indicate the end
465     * of the batch. It's for calls that always look at the next call and this
466     * stops them looking farther ahead.
467     */
468    assert(next->num_total_slots < TC_SLOTS_PER_BATCH);
469    struct tc_call_base *call =
470       (struct tc_call_base*)&next->slots[next->num_total_slots];
471    call->call_id = TC_END_BATCH;
472    call->num_slots = 1;
473 #if !defined(NDEBUG) && TC_DEBUG >= 1
474    call->sentinel = TC_SENTINEL;
475 #endif
476 }
477 
478 static void
tc_batch_flush(struct threaded_context * tc,bool full_copy)479 tc_batch_flush(struct threaded_context *tc, bool full_copy)
480 {
481    struct tc_batch *next = &tc->batch_slots[tc->next];
482    unsigned next_id = (tc->next + 1) % TC_MAX_BATCHES;
483 
484    tc_assert(next->num_total_slots != 0);
485    tc_add_call_end(next);
486 
487    tc_batch_check(next);
488    tc_debug_check(tc);
489    tc->bytes_mapped_estimate = 0;
490    tc->bytes_replaced_estimate = 0;
491    p_atomic_add(&tc->num_offloaded_slots, next->num_total_slots);
492 
493    if (next->token) {
494       next->token->tc = NULL;
495       tc_unflushed_batch_token_reference(&next->token, NULL);
496    }
497    /* reset renderpass info index for subsequent use */
498    next->renderpass_info_idx = -1;
499 
500    /* always increment renderpass info on batch flush;
501     * renderpass info can only be accessed by its owner batch during execution
502     */
503    if (tc->renderpass_info_recording) {
504       tc->batch_slots[next_id].first_set_fb = full_copy;
505       tc_batch_increment_renderpass_info(tc, next_id, full_copy);
506    }
507 
508    util_queue_add_job(&tc->queue, next, &next->fence, tc_batch_execute,
509                       NULL, 0);
510    tc->last = tc->next;
511    tc->next = next_id;
512    if (next_id == 0)
513       tc->batch_generation++;
514    tc_begin_next_buffer_list(tc);
515 
516 }
517 
518 /* This is the function that adds variable-sized calls into the current
519  * batch. It also flushes the batch if there is not enough space there.
520  * All other higher-level "add" functions use it.
521  */
522 static void *
tc_add_sized_call(struct threaded_context * tc,enum tc_call_id id,unsigned num_slots)523 tc_add_sized_call(struct threaded_context *tc, enum tc_call_id id,
524                   unsigned num_slots)
525 {
526    TC_TRACE_SCOPE(id);
527    struct tc_batch *next = &tc->batch_slots[tc->next];
528    assert(num_slots <= TC_SLOTS_PER_BATCH - 1);
529    tc_debug_check(tc);
530 
531    if (unlikely(next->num_total_slots + num_slots > TC_SLOTS_PER_BATCH - 1)) {
532       /* copy existing renderpass info during flush */
533       tc_batch_flush(tc, true);
534       next = &tc->batch_slots[tc->next];
535       tc_assert(next->num_total_slots == 0);
536       tc_assert(next->last_mergeable_call == NULL);
537    }
538 
539    tc_assert(util_queue_fence_is_signalled(&next->fence));
540 
541    struct tc_call_base *call = (struct tc_call_base*)&next->slots[next->num_total_slots];
542    next->num_total_slots += num_slots;
543 
544 #if !defined(NDEBUG) && TC_DEBUG >= 1
545    call->sentinel = TC_SENTINEL;
546 #endif
547    call->call_id = id;
548    call->num_slots = num_slots;
549 
550 #if TC_DEBUG >= 3
551    tc_printf("ENQUEUE: %s", tc_call_names[id]);
552 #endif
553 
554    tc_debug_check(tc);
555    return call;
556 }
557 
558 #define tc_add_call(tc, execute, type) \
559    ((struct type*)tc_add_sized_call(tc, execute, call_size(type)))
560 
561 #define tc_add_slot_based_call(tc, execute, type, num_slots) \
562    ((struct type*)tc_add_sized_call(tc, execute, \
563                                     call_size_with_slots(type, num_slots)))
564 
565 /* Returns the last mergeable call that was added to the unflushed
566  * batch, or NULL if the address of that call is not currently known
567  * or no such call exists in the unflushed batch.
568  */
569 static struct tc_call_base *
tc_get_last_mergeable_call(struct threaded_context * tc)570 tc_get_last_mergeable_call(struct threaded_context *tc)
571 {
572    struct tc_batch *batch = &tc->batch_slots[tc->next];
573    struct tc_call_base *call = batch->last_mergeable_call;
574 
575    tc_assert(call == NULL || call->num_slots <= batch->num_total_slots);
576 
577    if (call && (uint64_t *)call == &batch->slots[batch->num_total_slots - call->num_slots])
578       return call;
579    else
580       return NULL;
581 }
582 
583 /* Increases the size of the last call in the unflushed batch to the
584  * given number of slots, if possible, without changing the call's data.
585  */
586 static bool
tc_enlarge_last_mergeable_call(struct threaded_context * tc,unsigned desired_num_slots)587 tc_enlarge_last_mergeable_call(struct threaded_context *tc, unsigned desired_num_slots)
588 {
589    struct tc_batch *batch = &tc->batch_slots[tc->next];
590    struct tc_call_base *call = tc_get_last_mergeable_call(tc);
591 
592    tc_assert(call);
593    tc_assert(desired_num_slots >= call->num_slots);
594 
595    unsigned added_slots = desired_num_slots - call->num_slots;
596 
597    if (unlikely(batch->num_total_slots + added_slots > TC_SLOTS_PER_BATCH - 1))
598       return false;
599 
600    batch->num_total_slots += added_slots;
601    call->num_slots += added_slots;
602 
603    return true;
604 }
605 
606 static void
tc_mark_call_mergeable(struct threaded_context * tc,struct tc_call_base * call)607 tc_mark_call_mergeable(struct threaded_context *tc, struct tc_call_base *call)
608 {
609    struct tc_batch *batch = &tc->batch_slots[tc->next];
610    tc_assert(call->num_slots <= batch->num_total_slots);
611    tc_assert((uint64_t *)call == &batch->slots[batch->num_total_slots - call->num_slots]);
612    batch->last_mergeable_call = call;
613 }
614 
615 static bool
tc_is_sync(struct threaded_context * tc)616 tc_is_sync(struct threaded_context *tc)
617 {
618    struct tc_batch *last = &tc->batch_slots[tc->last];
619    struct tc_batch *next = &tc->batch_slots[tc->next];
620 
621    return util_queue_fence_is_signalled(&last->fence) &&
622           !next->num_total_slots;
623 }
624 
625 static void
_tc_sync(struct threaded_context * tc,UNUSED const char * info,UNUSED const char * func)626 _tc_sync(struct threaded_context *tc, UNUSED const char *info, UNUSED const char *func)
627 {
628    struct tc_batch *last = &tc->batch_slots[tc->last];
629    struct tc_batch *next = &tc->batch_slots[tc->next];
630    bool synced = false;
631 
632    MESA_TRACE_SCOPE(func);
633 
634    tc_debug_check(tc);
635 
636    if (tc->options.parse_renderpass_info && tc->in_renderpass && !tc->flushing) {
637       /* corner case: if tc syncs for any reason but a driver flush during a renderpass,
638        * then the current renderpass info MUST be signaled to avoid deadlocking the driver
639        *
640        * this is not a "complete" signal operation, however, as it's unknown what calls may
641        * come after this one, which means that framebuffer attachment data is unreliable
642        *
643        * to avoid erroneously passing bad state to the driver (e.g., allowing zsbuf elimination),
644        * force all attachments active and assume the app was going to get bad perf here anyway
645        */
646       tc_sanitize_renderpass_info(tc);
647    }
648    tc_signal_renderpass_info_ready(tc);
649 
650    /* Only wait for queued calls... */
651    if (!util_queue_fence_is_signalled(&last->fence)) {
652       util_queue_fence_wait(&last->fence);
653       synced = true;
654    }
655 
656    tc_debug_check(tc);
657 
658    if (next->token) {
659       next->token->tc = NULL;
660       tc_unflushed_batch_token_reference(&next->token, NULL);
661    }
662 
663    /* .. and execute unflushed calls directly. */
664    if (next->num_total_slots) {
665       p_atomic_add(&tc->num_direct_slots, next->num_total_slots);
666       tc->bytes_mapped_estimate = 0;
667       tc->bytes_replaced_estimate = 0;
668       tc_add_call_end(next);
669       tc_batch_execute(next, NULL, 0);
670       tc_begin_next_buffer_list(tc);
671       synced = true;
672    }
673 
674    if (synced) {
675       p_atomic_inc(&tc->num_syncs);
676 
677       if (tc_strcmp(func, "tc_destroy") != 0) {
678          tc_printf("sync %s %s", func, info);
679       }
680    }
681 
682    tc_debug_check(tc);
683 
684    if (tc->options.parse_renderpass_info) {
685       int renderpass_info_idx = next->renderpass_info_idx;
686       if (renderpass_info_idx > 0) {
687          /* don't reset if fb state is unflushed */
688          bool fb_no_draw = tc->seen_fb_state && !tc->renderpass_info_recording->has_draw;
689          uint32_t fb_info = tc->renderpass_info_recording->data32[0];
690          next->renderpass_info_idx = -1;
691          tc_batch_increment_renderpass_info(tc, tc->next, false);
692          if (fb_no_draw)
693             tc->renderpass_info_recording->data32[0] = fb_info;
694       } else if (tc->renderpass_info_recording->has_draw) {
695          tc->renderpass_info_recording->data32[0] = 0;
696       }
697       tc->seen_fb_state = false;
698       tc->query_ended = false;
699    }
700 }
701 
702 #define tc_sync(tc) _tc_sync(tc, "", __func__)
703 #define tc_sync_msg(tc, info) _tc_sync(tc, info, __func__)
704 
705 /**
706  * Call this from fence_finish for same-context fence waits of deferred fences
707  * that haven't been flushed yet.
708  *
709  * The passed pipe_context must be the one passed to pipe_screen::fence_finish,
710  * i.e., the wrapped one.
711  */
712 void
threaded_context_flush(struct pipe_context * _pipe,struct tc_unflushed_batch_token * token,bool prefer_async)713 threaded_context_flush(struct pipe_context *_pipe,
714                        struct tc_unflushed_batch_token *token,
715                        bool prefer_async)
716 {
717    struct threaded_context *tc = threaded_context(_pipe);
718 
719    /* This is called from the gallium frontend / application thread. */
720    if (token->tc && token->tc == tc) {
721       struct tc_batch *last = &tc->batch_slots[tc->last];
722 
723       /* Prefer to do the flush in the driver thread if it is already
724        * running. That should be better for cache locality.
725        */
726       if (prefer_async || !util_queue_fence_is_signalled(&last->fence))
727          tc_batch_flush(tc, false);
728       else
729          tc_sync(token->tc);
730    }
731 }
732 
733 static void
tc_add_to_buffer_list(struct tc_buffer_list * next,struct pipe_resource * buf)734 tc_add_to_buffer_list(struct tc_buffer_list *next, struct pipe_resource *buf)
735 {
736    uint32_t id = threaded_resource(buf)->buffer_id_unique;
737    BITSET_SET(next->buffer_list, id & TC_BUFFER_ID_MASK);
738 }
739 
740 /* Reset a range of buffer binding slots. */
741 static void
tc_unbind_buffers(uint32_t * binding,unsigned count)742 tc_unbind_buffers(uint32_t *binding, unsigned count)
743 {
744    if (count)
745       memset(binding, 0, sizeof(*binding) * count);
746 }
747 
748 static void
tc_add_bindings_to_buffer_list(BITSET_WORD * buffer_list,const uint32_t * bindings,unsigned count)749 tc_add_bindings_to_buffer_list(BITSET_WORD *buffer_list, const uint32_t *bindings,
750                                unsigned count)
751 {
752    for (unsigned i = 0; i < count; i++) {
753       if (bindings[i])
754          BITSET_SET(buffer_list, bindings[i] & TC_BUFFER_ID_MASK);
755    }
756 }
757 
758 static bool
tc_rebind_bindings(uint32_t old_id,uint32_t new_id,uint32_t * bindings,unsigned count)759 tc_rebind_bindings(uint32_t old_id, uint32_t new_id, uint32_t *bindings,
760                    unsigned count)
761 {
762    unsigned rebind_count = 0;
763 
764    for (unsigned i = 0; i < count; i++) {
765       if (bindings[i] == old_id) {
766          bindings[i] = new_id;
767          rebind_count++;
768       }
769    }
770    return rebind_count;
771 }
772 
773 static void
tc_add_shader_bindings_to_buffer_list(struct threaded_context * tc,BITSET_WORD * buffer_list,enum pipe_shader_type shader)774 tc_add_shader_bindings_to_buffer_list(struct threaded_context *tc,
775                                       BITSET_WORD *buffer_list,
776                                       enum pipe_shader_type shader)
777 {
778    tc_add_bindings_to_buffer_list(buffer_list, tc->const_buffers[shader],
779                                   tc->max_const_buffers);
780    if (tc->seen_shader_buffers[shader]) {
781       tc_add_bindings_to_buffer_list(buffer_list, tc->shader_buffers[shader],
782                                      tc->max_shader_buffers);
783    }
784    if (tc->seen_image_buffers[shader]) {
785       tc_add_bindings_to_buffer_list(buffer_list, tc->image_buffers[shader],
786                                      tc->max_images);
787    }
788    if (tc->seen_sampler_buffers[shader]) {
789       tc_add_bindings_to_buffer_list(buffer_list, tc->sampler_buffers[shader],
790                                      tc->max_samplers);
791    }
792 }
793 
794 static unsigned
tc_rebind_shader_bindings(struct threaded_context * tc,uint32_t old_id,uint32_t new_id,enum pipe_shader_type shader,uint32_t * rebind_mask)795 tc_rebind_shader_bindings(struct threaded_context *tc, uint32_t old_id,
796                           uint32_t new_id, enum pipe_shader_type shader, uint32_t *rebind_mask)
797 {
798    unsigned ubo = 0, ssbo = 0, img = 0, sampler = 0;
799 
800    ubo = tc_rebind_bindings(old_id, new_id, tc->const_buffers[shader],
801                             tc->max_const_buffers);
802    if (ubo)
803       *rebind_mask |= BITFIELD_BIT(TC_BINDING_UBO_VS) << shader;
804    if (tc->seen_shader_buffers[shader]) {
805       ssbo = tc_rebind_bindings(old_id, new_id, tc->shader_buffers[shader],
806                                 tc->max_shader_buffers);
807       if (ssbo)
808          *rebind_mask |= BITFIELD_BIT(TC_BINDING_SSBO_VS) << shader;
809    }
810    if (tc->seen_image_buffers[shader]) {
811       img = tc_rebind_bindings(old_id, new_id, tc->image_buffers[shader],
812                                tc->max_images);
813       if (img)
814          *rebind_mask |= BITFIELD_BIT(TC_BINDING_IMAGE_VS) << shader;
815    }
816    if (tc->seen_sampler_buffers[shader]) {
817       sampler = tc_rebind_bindings(old_id, new_id, tc->sampler_buffers[shader],
818                                    tc->max_samplers);
819       if (sampler)
820          *rebind_mask |= BITFIELD_BIT(TC_BINDING_SAMPLERVIEW_VS) << shader;
821    }
822    return ubo + ssbo + img + sampler;
823 }
824 
825 /* Add all bound buffers used by VS/TCS/TES/GS/FS to the buffer list.
826  * This is called by the first draw call in a batch when we want to inherit
827  * all bindings set by the previous batch.
828  */
829 static void
tc_add_all_gfx_bindings_to_buffer_list(struct threaded_context * tc)830 tc_add_all_gfx_bindings_to_buffer_list(struct threaded_context *tc)
831 {
832    BITSET_WORD *buffer_list = tc->buffer_lists[tc->next_buf_list].buffer_list;
833 
834    tc_add_bindings_to_buffer_list(buffer_list, tc->vertex_buffers, tc->num_vertex_buffers);
835    if (tc->seen_streamout_buffers)
836       tc_add_bindings_to_buffer_list(buffer_list, tc->streamout_buffers, PIPE_MAX_SO_BUFFERS);
837 
838    tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_VERTEX);
839    tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_FRAGMENT);
840 
841    if (tc->seen_tcs)
842       tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_TESS_CTRL);
843    if (tc->seen_tes)
844       tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_TESS_EVAL);
845    if (tc->seen_gs)
846       tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_GEOMETRY);
847 
848    tc->add_all_gfx_bindings_to_buffer_list = false;
849 }
850 
851 /* Add all bound buffers used by compute to the buffer list.
852  * This is called by the first compute call in a batch when we want to inherit
853  * all bindings set by the previous batch.
854  */
855 static void
tc_add_all_compute_bindings_to_buffer_list(struct threaded_context * tc)856 tc_add_all_compute_bindings_to_buffer_list(struct threaded_context *tc)
857 {
858    BITSET_WORD *buffer_list = tc->buffer_lists[tc->next_buf_list].buffer_list;
859 
860    tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_COMPUTE);
861    tc->add_all_compute_bindings_to_buffer_list = false;
862 }
863 
864 static unsigned
tc_rebind_buffer(struct threaded_context * tc,uint32_t old_id,uint32_t new_id,uint32_t * rebind_mask)865 tc_rebind_buffer(struct threaded_context *tc, uint32_t old_id, uint32_t new_id, uint32_t *rebind_mask)
866 {
867    unsigned vbo = 0, so = 0;
868 
869    vbo = tc_rebind_bindings(old_id, new_id, tc->vertex_buffers,
870                             tc->num_vertex_buffers);
871    if (vbo)
872       *rebind_mask |= BITFIELD_BIT(TC_BINDING_VERTEX_BUFFER);
873 
874    if (tc->seen_streamout_buffers) {
875       so = tc_rebind_bindings(old_id, new_id, tc->streamout_buffers,
876                               PIPE_MAX_SO_BUFFERS);
877       if (so)
878          *rebind_mask |= BITFIELD_BIT(TC_BINDING_STREAMOUT_BUFFER);
879    }
880    unsigned rebound = vbo + so;
881 
882    rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_VERTEX, rebind_mask);
883    rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_FRAGMENT, rebind_mask);
884 
885    if (tc->seen_tcs)
886       rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_TESS_CTRL, rebind_mask);
887    if (tc->seen_tes)
888       rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_TESS_EVAL, rebind_mask);
889    if (tc->seen_gs)
890       rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_GEOMETRY, rebind_mask);
891 
892    rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_COMPUTE, rebind_mask);
893 
894    if (rebound)
895       BITSET_SET(tc->buffer_lists[tc->next_buf_list].buffer_list, new_id & TC_BUFFER_ID_MASK);
896    return rebound;
897 }
898 
899 static bool
tc_is_buffer_bound_with_mask(uint32_t id,uint32_t * bindings,unsigned binding_mask)900 tc_is_buffer_bound_with_mask(uint32_t id, uint32_t *bindings, unsigned binding_mask)
901 {
902    while (binding_mask) {
903       if (bindings[u_bit_scan(&binding_mask)] == id)
904          return true;
905    }
906    return false;
907 }
908 
909 static bool
tc_is_buffer_shader_bound_for_write(struct threaded_context * tc,uint32_t id,enum pipe_shader_type shader)910 tc_is_buffer_shader_bound_for_write(struct threaded_context *tc, uint32_t id,
911                                     enum pipe_shader_type shader)
912 {
913    if (tc->seen_shader_buffers[shader] &&
914        tc_is_buffer_bound_with_mask(id, tc->shader_buffers[shader],
915                                     tc->shader_buffers_writeable_mask[shader]))
916       return true;
917 
918    if (tc->seen_image_buffers[shader] &&
919        tc_is_buffer_bound_with_mask(id, tc->image_buffers[shader],
920                                     tc->image_buffers_writeable_mask[shader]))
921       return true;
922 
923    return false;
924 }
925 
926 static bool
tc_is_buffer_bound_for_write(struct threaded_context * tc,uint32_t id)927 tc_is_buffer_bound_for_write(struct threaded_context *tc, uint32_t id)
928 {
929    if (tc->seen_streamout_buffers &&
930        tc_is_buffer_bound_with_mask(id, tc->streamout_buffers,
931                                     BITFIELD_MASK(PIPE_MAX_SO_BUFFERS)))
932       return true;
933 
934    if (tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_VERTEX) ||
935        tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_FRAGMENT) ||
936        tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_COMPUTE))
937       return true;
938 
939    if (tc->seen_tcs &&
940        tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_TESS_CTRL))
941       return true;
942 
943    if (tc->seen_tes &&
944        tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_TESS_EVAL))
945       return true;
946 
947    if (tc->seen_gs &&
948        tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_GEOMETRY))
949       return true;
950 
951    return false;
952 }
953 
954 static bool
tc_is_buffer_busy(struct threaded_context * tc,struct threaded_resource * tbuf,unsigned map_usage)955 tc_is_buffer_busy(struct threaded_context *tc, struct threaded_resource *tbuf,
956                   unsigned map_usage)
957 {
958    if (!tc->options.is_resource_busy)
959       return true;
960 
961    uint32_t id_hash = tbuf->buffer_id_unique & TC_BUFFER_ID_MASK;
962 
963    for (unsigned i = 0; i < TC_MAX_BUFFER_LISTS; i++) {
964       struct tc_buffer_list *buf_list = &tc->buffer_lists[i];
965 
966       /* If the buffer is referenced by a batch that hasn't been flushed (by tc or the driver),
967        * then the buffer is considered busy. */
968       if (!util_queue_fence_is_signalled(&buf_list->driver_flushed_fence) &&
969           BITSET_TEST(buf_list->buffer_list, id_hash))
970          return true;
971    }
972 
973    /* The buffer isn't referenced by any unflushed batch: we can safely ask to the driver whether
974     * this buffer is busy or not. */
975    return tc->options.is_resource_busy(tc->pipe->screen, tbuf->latest, map_usage);
976 }
977 
978 /**
979  * allow_cpu_storage should be false for user memory and imported buffers.
980  */
981 void
threaded_resource_init(struct pipe_resource * res,bool allow_cpu_storage)982 threaded_resource_init(struct pipe_resource *res, bool allow_cpu_storage)
983 {
984    struct threaded_resource *tres = threaded_resource(res);
985 
986    tres->latest = &tres->b;
987    tres->cpu_storage = NULL;
988    util_range_init(&tres->valid_buffer_range);
989    tres->is_shared = false;
990    tres->is_user_ptr = false;
991    tres->buffer_id_unique = 0;
992    tres->pending_staging_uploads = 0;
993    tres->last_batch_usage = -1;
994    util_range_init(&tres->pending_staging_uploads_range);
995 
996    if (allow_cpu_storage &&
997        !(res->flags & (PIPE_RESOURCE_FLAG_MAP_PERSISTENT |
998                        PIPE_RESOURCE_FLAG_SPARSE |
999                        PIPE_RESOURCE_FLAG_ENCRYPTED)) &&
1000        /* We need buffer invalidation and buffer busyness tracking for the CPU
1001         * storage, which aren't supported with pipe_vertex_state. */
1002        !(res->bind & PIPE_BIND_VERTEX_STATE))
1003       tres->allow_cpu_storage = true;
1004    else
1005       tres->allow_cpu_storage = false;
1006 }
1007 
1008 void
threaded_resource_deinit(struct pipe_resource * res)1009 threaded_resource_deinit(struct pipe_resource *res)
1010 {
1011    struct threaded_resource *tres = threaded_resource(res);
1012 
1013    if (tres->latest != &tres->b)
1014            pipe_resource_reference(&tres->latest, NULL);
1015    util_range_destroy(&tres->valid_buffer_range);
1016    util_range_destroy(&tres->pending_staging_uploads_range);
1017    align_free(tres->cpu_storage);
1018 }
1019 
1020 struct pipe_context *
threaded_context_unwrap_sync(struct pipe_context * pipe)1021 threaded_context_unwrap_sync(struct pipe_context *pipe)
1022 {
1023    if (!pipe || !pipe->priv)
1024       return pipe;
1025 
1026    tc_sync(threaded_context(pipe));
1027    return (struct pipe_context*)pipe->priv;
1028 }
1029 
1030 
1031 /********************************************************************
1032  * simple functions
1033  */
1034 
1035 #define TC_FUNC1(func, qualifier, type, deref, addr, ...) \
1036    struct tc_call_##func { \
1037       struct tc_call_base base; \
1038       type state; \
1039    }; \
1040    \
1041    static uint16_t ALWAYS_INLINE \
1042    tc_call_##func(struct pipe_context *pipe, void *call) \
1043    { \
1044       pipe->func(pipe, addr(to_call(call, tc_call_##func)->state)); \
1045       return call_size(tc_call_##func); \
1046    } \
1047    \
1048    static void \
1049    tc_##func(struct pipe_context *_pipe, qualifier type deref param) \
1050    { \
1051       struct threaded_context *tc = threaded_context(_pipe); \
1052       struct tc_call_##func *p = (struct tc_call_##func*) \
1053                      tc_add_call(tc, TC_CALL_##func, tc_call_##func); \
1054       p->state = deref(param); \
1055       __VA_ARGS__; \
1056    }
1057 
1058 TC_FUNC1(set_active_query_state, , bool, , )
1059 
1060 TC_FUNC1(set_blend_color, const, struct pipe_blend_color, *, &)
1061 TC_FUNC1(set_stencil_ref, const, struct pipe_stencil_ref, , )
1062 TC_FUNC1(set_clip_state, const, struct pipe_clip_state, *, &)
1063 TC_FUNC1(set_sample_mask, , unsigned, , )
1064 TC_FUNC1(set_min_samples, , unsigned, , )
1065 TC_FUNC1(set_polygon_stipple, const, struct pipe_poly_stipple, *, &)
1066 
1067 TC_FUNC1(texture_barrier, , unsigned, , )
1068 TC_FUNC1(memory_barrier, , unsigned, , )
1069 TC_FUNC1(delete_texture_handle, , uint64_t, , )
1070 TC_FUNC1(delete_image_handle, , uint64_t, , )
1071 TC_FUNC1(set_frontend_noop, , bool, , )
1072 
1073 
1074 /********************************************************************
1075  * queries
1076  */
1077 
1078 static struct pipe_query *
tc_create_query(struct pipe_context * _pipe,unsigned query_type,unsigned index)1079 tc_create_query(struct pipe_context *_pipe, unsigned query_type,
1080                 unsigned index)
1081 {
1082    struct threaded_context *tc = threaded_context(_pipe);
1083    struct pipe_context *pipe = tc->pipe;
1084 
1085    return pipe->create_query(pipe, query_type, index);
1086 }
1087 
1088 static struct pipe_query *
tc_create_batch_query(struct pipe_context * _pipe,unsigned num_queries,unsigned * query_types)1089 tc_create_batch_query(struct pipe_context *_pipe, unsigned num_queries,
1090                       unsigned *query_types)
1091 {
1092    struct threaded_context *tc = threaded_context(_pipe);
1093    struct pipe_context *pipe = tc->pipe;
1094 
1095    return pipe->create_batch_query(pipe, num_queries, query_types);
1096 }
1097 
1098 struct tc_query_call {
1099    struct tc_call_base base;
1100    struct pipe_query *query;
1101 };
1102 
1103 static uint16_t ALWAYS_INLINE
tc_call_destroy_query(struct pipe_context * pipe,void * call)1104 tc_call_destroy_query(struct pipe_context *pipe, void *call)
1105 {
1106    struct pipe_query *query = to_call(call, tc_query_call)->query;
1107    struct threaded_query *tq = threaded_query(query);
1108 
1109    if (list_is_linked(&tq->head_unflushed))
1110       list_del(&tq->head_unflushed);
1111 
1112    pipe->destroy_query(pipe, query);
1113    return call_size(tc_query_call);
1114 }
1115 
1116 static void
tc_destroy_query(struct pipe_context * _pipe,struct pipe_query * query)1117 tc_destroy_query(struct pipe_context *_pipe, struct pipe_query *query)
1118 {
1119    struct threaded_context *tc = threaded_context(_pipe);
1120 
1121    tc_add_call(tc, TC_CALL_destroy_query, tc_query_call)->query = query;
1122 }
1123 
1124 static uint16_t ALWAYS_INLINE
tc_call_begin_query(struct pipe_context * pipe,void * call)1125 tc_call_begin_query(struct pipe_context *pipe, void *call)
1126 {
1127    pipe->begin_query(pipe, to_call(call, tc_query_call)->query);
1128    return call_size(tc_query_call);
1129 }
1130 
1131 static bool
tc_begin_query(struct pipe_context * _pipe,struct pipe_query * query)1132 tc_begin_query(struct pipe_context *_pipe, struct pipe_query *query)
1133 {
1134    struct threaded_context *tc = threaded_context(_pipe);
1135    tc->num_queries_active++;
1136 
1137    tc_add_call(tc, TC_CALL_begin_query, tc_query_call)->query = query;
1138    return true; /* we don't care about the return value for this call */
1139 }
1140 
1141 struct tc_end_query_call {
1142    struct tc_call_base base;
1143    struct threaded_context *tc;
1144    struct pipe_query *query;
1145 };
1146 
1147 static uint16_t ALWAYS_INLINE
tc_call_end_query(struct pipe_context * pipe,void * call)1148 tc_call_end_query(struct pipe_context *pipe, void *call)
1149 {
1150    struct tc_end_query_call *p = to_call(call, tc_end_query_call);
1151    struct threaded_query *tq = threaded_query(p->query);
1152 
1153    if (!list_is_linked(&tq->head_unflushed))
1154       list_add(&tq->head_unflushed, &p->tc->unflushed_queries);
1155 
1156    pipe->end_query(pipe, p->query);
1157    return call_size(tc_end_query_call);
1158 }
1159 
1160 static bool
tc_end_query(struct pipe_context * _pipe,struct pipe_query * query)1161 tc_end_query(struct pipe_context *_pipe, struct pipe_query *query)
1162 {
1163    struct threaded_context *tc = threaded_context(_pipe);
1164    struct threaded_query *tq = threaded_query(query);
1165    struct tc_end_query_call *call =
1166       tc_add_call(tc, TC_CALL_end_query, tc_end_query_call);
1167    tc->num_queries_active--;
1168 
1169    call->tc = tc;
1170    call->query = query;
1171 
1172    tq->flushed = false;
1173    tc->query_ended = true;
1174 
1175    return true; /* we don't care about the return value for this call */
1176 }
1177 
1178 static bool
tc_get_query_result(struct pipe_context * _pipe,struct pipe_query * query,bool wait,union pipe_query_result * result)1179 tc_get_query_result(struct pipe_context *_pipe,
1180                     struct pipe_query *query, bool wait,
1181                     union pipe_query_result *result)
1182 {
1183    struct threaded_context *tc = threaded_context(_pipe);
1184    struct threaded_query *tq = threaded_query(query);
1185    struct pipe_context *pipe = tc->pipe;
1186    bool flushed = tq->flushed;
1187 
1188    if (!flushed) {
1189       tc_sync_msg(tc, wait ? "wait" : "nowait");
1190       tc_set_driver_thread(tc);
1191    }
1192 
1193    bool success = pipe->get_query_result(pipe, query, wait, result);
1194 
1195    if (!flushed)
1196       tc_clear_driver_thread(tc);
1197 
1198    if (success) {
1199       tq->flushed = true;
1200       if (list_is_linked(&tq->head_unflushed)) {
1201          /* This is safe because it can only happen after we sync'd. */
1202          list_del(&tq->head_unflushed);
1203       }
1204    }
1205    return success;
1206 }
1207 
1208 struct tc_query_result_resource {
1209    struct tc_call_base base;
1210    enum pipe_query_flags flags:8;
1211    enum pipe_query_value_type result_type:8;
1212    int8_t index; /* it can be -1 */
1213    unsigned offset;
1214    struct pipe_query *query;
1215    struct pipe_resource *resource;
1216 };
1217 
1218 static uint16_t ALWAYS_INLINE
tc_call_get_query_result_resource(struct pipe_context * pipe,void * call)1219 tc_call_get_query_result_resource(struct pipe_context *pipe, void *call)
1220 {
1221    struct tc_query_result_resource *p = to_call(call, tc_query_result_resource);
1222 
1223    pipe->get_query_result_resource(pipe, p->query, p->flags, p->result_type,
1224                                    p->index, p->resource, p->offset);
1225    tc_drop_resource_reference(p->resource);
1226    return call_size(tc_query_result_resource);
1227 }
1228 
1229 static void
tc_get_query_result_resource(struct pipe_context * _pipe,struct pipe_query * query,enum pipe_query_flags flags,enum pipe_query_value_type result_type,int index,struct pipe_resource * resource,unsigned offset)1230 tc_get_query_result_resource(struct pipe_context *_pipe,
1231                              struct pipe_query *query,
1232                              enum pipe_query_flags flags,
1233                              enum pipe_query_value_type result_type, int index,
1234                              struct pipe_resource *resource, unsigned offset)
1235 {
1236    struct threaded_context *tc = threaded_context(_pipe);
1237 
1238    tc_buffer_disable_cpu_storage(resource);
1239 
1240    struct tc_query_result_resource *p =
1241       tc_add_call(tc, TC_CALL_get_query_result_resource,
1242                   tc_query_result_resource);
1243    p->query = query;
1244    p->flags = flags;
1245    p->result_type = result_type;
1246    p->index = index;
1247    tc_set_resource_reference(&p->resource, resource);
1248    tc_add_to_buffer_list(&tc->buffer_lists[tc->next_buf_list], resource);
1249    p->offset = offset;
1250 }
1251 
1252 struct tc_render_condition {
1253    struct tc_call_base base;
1254    bool condition;
1255    unsigned mode;
1256    struct pipe_query *query;
1257 };
1258 
1259 static uint16_t ALWAYS_INLINE
tc_call_render_condition(struct pipe_context * pipe,void * call)1260 tc_call_render_condition(struct pipe_context *pipe, void *call)
1261 {
1262    struct tc_render_condition *p = to_call(call, tc_render_condition);
1263    pipe->render_condition(pipe, p->query, p->condition, p->mode);
1264    return call_size(tc_render_condition);
1265 }
1266 
1267 static void
tc_render_condition(struct pipe_context * _pipe,struct pipe_query * query,bool condition,enum pipe_render_cond_flag mode)1268 tc_render_condition(struct pipe_context *_pipe,
1269                     struct pipe_query *query, bool condition,
1270                     enum pipe_render_cond_flag mode)
1271 {
1272    struct threaded_context *tc = threaded_context(_pipe);
1273    struct tc_render_condition *p =
1274       tc_add_call(tc, TC_CALL_render_condition, tc_render_condition);
1275 
1276    p->query = query;
1277    p->condition = condition;
1278    p->mode = mode;
1279 }
1280 
1281 
1282 /********************************************************************
1283  * constant (immutable) states
1284  */
1285 
1286 #define TC_CSO_CREATE(name, sname) \
1287    static void * \
1288    tc_create_##name##_state(struct pipe_context *_pipe, \
1289                             const struct pipe_##sname##_state *state) \
1290    { \
1291       struct pipe_context *pipe = threaded_context(_pipe)->pipe; \
1292       return pipe->create_##name##_state(pipe, state); \
1293    }
1294 
1295 #define TC_CSO_BIND(name, ...) TC_FUNC1(bind_##name##_state, , void *, , , ##__VA_ARGS__)
1296 #define TC_CSO_DELETE(name) TC_FUNC1(delete_##name##_state, , void *, , )
1297 
1298 #define TC_CSO(name, sname, ...) \
1299    TC_CSO_CREATE(name, sname) \
1300    TC_CSO_BIND(name, ##__VA_ARGS__) \
1301    TC_CSO_DELETE(name)
1302 
1303 #define TC_CSO_WHOLE(name) TC_CSO(name, name)
1304 #define TC_CSO_SHADER(name) TC_CSO(name, shader)
1305 #define TC_CSO_SHADER_TRACK(name) TC_CSO(name, shader, tc->seen_##name = true;)
1306 
1307 TC_CSO_WHOLE(blend)
TC_CSO_WHOLE(rasterizer)1308 TC_CSO_WHOLE(rasterizer)
1309 TC_CSO_CREATE(depth_stencil_alpha, depth_stencil_alpha)
1310 TC_CSO_BIND(depth_stencil_alpha,
1311    if (param && tc->options.parse_renderpass_info) {
1312       /* dsa info is only ever added during a renderpass;
1313        * changes outside of a renderpass reset the data
1314        */
1315       if (!tc->in_renderpass) {
1316          tc_get_renderpass_info(tc)->zsbuf_write_dsa = 0;
1317          tc_get_renderpass_info(tc)->zsbuf_read_dsa = 0;
1318       }
1319       /* let the driver parse its own state */
1320       tc->options.dsa_parse(param, tc_get_renderpass_info(tc));
1321    }
1322 )
1323 TC_CSO_DELETE(depth_stencil_alpha)
1324 TC_CSO_WHOLE(compute)
1325 TC_CSO_CREATE(fs, shader)
1326 TC_CSO_BIND(fs,
1327    if (param && tc->options.parse_renderpass_info) {
1328       /* fs info is only ever added during a renderpass;
1329        * changes outside of a renderpass reset the data
1330        */
1331       if (!tc->in_renderpass) {
1332          tc_get_renderpass_info(tc)->cbuf_fbfetch = 0;
1333          tc_get_renderpass_info(tc)->zsbuf_write_fs = 0;
1334       }
1335       /* let the driver parse its own state */
1336       tc->options.fs_parse(param, tc_get_renderpass_info(tc));
1337    }
1338 )
1339 TC_CSO_DELETE(fs)
1340 TC_CSO_SHADER(vs)
1341 TC_CSO_SHADER_TRACK(gs)
1342 TC_CSO_SHADER_TRACK(tcs)
1343 TC_CSO_SHADER_TRACK(tes)
1344 TC_CSO_CREATE(sampler, sampler)
1345 TC_CSO_DELETE(sampler)
1346 TC_CSO_BIND(vertex_elements)
1347 TC_CSO_DELETE(vertex_elements)
1348 
1349 static void *
1350 tc_create_vertex_elements_state(struct pipe_context *_pipe, unsigned count,
1351                                 const struct pipe_vertex_element *elems)
1352 {
1353    struct pipe_context *pipe = threaded_context(_pipe)->pipe;
1354 
1355    return pipe->create_vertex_elements_state(pipe, count, elems);
1356 }
1357 
1358 struct tc_sampler_states {
1359    struct tc_call_base base;
1360    uint8_t shader, start, count;
1361    void *slot[0]; /* more will be allocated if needed */
1362 };
1363 
1364 static uint16_t ALWAYS_INLINE
tc_call_bind_sampler_states(struct pipe_context * pipe,void * call)1365 tc_call_bind_sampler_states(struct pipe_context *pipe, void *call)
1366 {
1367    struct tc_sampler_states *p = (struct tc_sampler_states *)call;
1368 
1369    pipe->bind_sampler_states(pipe, p->shader, p->start, p->count, p->slot);
1370    return p->base.num_slots;
1371 }
1372 
1373 static void
tc_bind_sampler_states(struct pipe_context * _pipe,enum pipe_shader_type shader,unsigned start,unsigned count,void ** states)1374 tc_bind_sampler_states(struct pipe_context *_pipe,
1375                        enum pipe_shader_type shader,
1376                        unsigned start, unsigned count, void **states)
1377 {
1378    if (!count)
1379       return;
1380 
1381    struct threaded_context *tc = threaded_context(_pipe);
1382    struct tc_sampler_states *p =
1383       tc_add_slot_based_call(tc, TC_CALL_bind_sampler_states, tc_sampler_states, count);
1384 
1385    p->shader = shader;
1386    p->start = start;
1387    p->count = count;
1388    memcpy(p->slot, states, count * sizeof(states[0]));
1389 }
1390 
1391 static void
tc_link_shader(struct pipe_context * _pipe,void ** shaders)1392 tc_link_shader(struct pipe_context *_pipe, void **shaders)
1393 {
1394    struct threaded_context *tc = threaded_context(_pipe);
1395    tc->pipe->link_shader(tc->pipe, shaders);
1396 }
1397 /********************************************************************
1398  * immediate states
1399  */
1400 
1401 struct tc_framebuffer {
1402    struct tc_call_base base;
1403    struct pipe_framebuffer_state state;
1404 };
1405 
1406 static uint16_t ALWAYS_INLINE
tc_call_set_framebuffer_state(struct pipe_context * pipe,void * call)1407 tc_call_set_framebuffer_state(struct pipe_context *pipe, void *call)
1408 {
1409    struct pipe_framebuffer_state *p = &to_call(call, tc_framebuffer)->state;
1410 
1411    pipe->set_framebuffer_state(pipe, p);
1412 
1413    unsigned nr_cbufs = p->nr_cbufs;
1414    for (unsigned i = 0; i < nr_cbufs; i++)
1415       tc_drop_surface_reference(p->cbufs[i]);
1416    tc_drop_surface_reference(p->zsbuf);
1417    tc_drop_resource_reference(p->resolve);
1418    return call_size(tc_framebuffer);
1419 }
1420 
1421 static void
tc_set_framebuffer_state(struct pipe_context * _pipe,const struct pipe_framebuffer_state * fb)1422 tc_set_framebuffer_state(struct pipe_context *_pipe,
1423                          const struct pipe_framebuffer_state *fb)
1424 {
1425    struct threaded_context *tc = threaded_context(_pipe);
1426    struct tc_framebuffer *p =
1427       tc_add_call(tc, TC_CALL_set_framebuffer_state, tc_framebuffer);
1428    unsigned nr_cbufs = fb->nr_cbufs;
1429 
1430    p->state.width = fb->width;
1431    p->state.height = fb->height;
1432    p->state.samples = fb->samples;
1433    p->state.layers = fb->layers;
1434    p->state.nr_cbufs = nr_cbufs;
1435    p->state.viewmask = fb->viewmask;
1436 
1437    /* when unbinding, mark attachments as used for the current batch */
1438    for (unsigned i = 0; i < tc->nr_cbufs; i++) {
1439       tc_set_resource_batch_usage_persistent(tc, tc->fb_resources[i], false);
1440       pipe_resource_reference(&tc->fb_resources[i], NULL);
1441    }
1442    tc_set_resource_batch_usage_persistent(tc, tc->fb_resources[PIPE_MAX_COLOR_BUFS], false);
1443    tc_set_resource_batch_usage_persistent(tc, tc->fb_resolve, false);
1444 
1445    for (unsigned i = 0; i < nr_cbufs; i++) {
1446       p->state.cbufs[i] = NULL;
1447       pipe_surface_reference(&p->state.cbufs[i], fb->cbufs[i]);
1448       /* full tracking requires storing the fb attachment resources */
1449       if (fb->cbufs[i])
1450          pipe_resource_reference(&tc->fb_resources[i], fb->cbufs[i]->texture);
1451       tc_set_resource_batch_usage_persistent(tc, tc->fb_resources[i], true);
1452    }
1453    tc->nr_cbufs = nr_cbufs;
1454    if (tc->options.parse_renderpass_info) {
1455       /* ensure this is treated as the first fb set if no fb activity has occurred */
1456       if (!tc->renderpass_info_recording->has_draw &&
1457           !tc->renderpass_info_recording->cbuf_clear &&
1458           !tc->renderpass_info_recording->cbuf_load &&
1459           !tc->renderpass_info_recording->zsbuf_load &&
1460           !tc->renderpass_info_recording->zsbuf_clear_partial)
1461          tc->batch_slots[tc->next].first_set_fb = false;
1462       /* store existing zsbuf data for possible persistence */
1463       uint8_t zsbuf = tc->renderpass_info_recording->has_draw ?
1464                       0 :
1465                       tc->renderpass_info_recording->data8[3];
1466       bool zsbuf_changed = tc->fb_resources[PIPE_MAX_COLOR_BUFS] !=
1467                            (fb->zsbuf ? fb->zsbuf->texture : NULL);
1468 
1469       if (tc->seen_fb_state) {
1470          /* this is the end of a renderpass, so increment the renderpass info */
1471          tc_batch_increment_renderpass_info(tc, tc->next, false);
1472          /* if zsbuf hasn't changed (i.e., possibly just adding a color buffer):
1473           * keep zsbuf usage data
1474           */
1475          if (!zsbuf_changed)
1476             tc->renderpass_info_recording->data8[3] = zsbuf;
1477       } else {
1478          /* this is the first time a set_framebuffer_call is triggered;
1479           * just increment the index and keep using the existing info for recording
1480           */
1481          tc->batch_slots[tc->next].renderpass_info_idx = 0;
1482       }
1483       /* future fb state changes will increment the index */
1484       tc->seen_fb_state = true;
1485    }
1486    pipe_resource_reference(&tc->fb_resources[PIPE_MAX_COLOR_BUFS],
1487                            fb->zsbuf ? fb->zsbuf->texture : NULL);
1488    pipe_resource_reference(&tc->fb_resolve, fb->resolve);
1489    tc_set_resource_batch_usage_persistent(tc, tc->fb_resources[PIPE_MAX_COLOR_BUFS], true);
1490    tc_set_resource_batch_usage_persistent(tc, tc->fb_resolve, true);
1491    tc->in_renderpass = false;
1492    p->state.zsbuf = NULL;
1493    pipe_surface_reference(&p->state.zsbuf, fb->zsbuf);
1494    p->state.resolve = NULL;
1495    pipe_resource_reference(&p->state.resolve, fb->resolve);
1496 }
1497 
1498 struct tc_tess_state {
1499    struct tc_call_base base;
1500    float state[6];
1501 };
1502 
1503 static uint16_t ALWAYS_INLINE
tc_call_set_tess_state(struct pipe_context * pipe,void * call)1504 tc_call_set_tess_state(struct pipe_context *pipe, void *call)
1505 {
1506    float *p = to_call(call, tc_tess_state)->state;
1507 
1508    pipe->set_tess_state(pipe, p, p + 4);
1509    return call_size(tc_tess_state);
1510 }
1511 
1512 static void
tc_set_tess_state(struct pipe_context * _pipe,const float default_outer_level[4],const float default_inner_level[2])1513 tc_set_tess_state(struct pipe_context *_pipe,
1514                   const float default_outer_level[4],
1515                   const float default_inner_level[2])
1516 {
1517    struct threaded_context *tc = threaded_context(_pipe);
1518    float *p = tc_add_call(tc, TC_CALL_set_tess_state, tc_tess_state)->state;
1519 
1520    memcpy(p, default_outer_level, 4 * sizeof(float));
1521    memcpy(p + 4, default_inner_level, 2 * sizeof(float));
1522 }
1523 
1524 struct tc_patch_vertices {
1525    struct tc_call_base base;
1526    uint8_t patch_vertices;
1527 };
1528 
1529 static uint16_t ALWAYS_INLINE
tc_call_set_patch_vertices(struct pipe_context * pipe,void * call)1530 tc_call_set_patch_vertices(struct pipe_context *pipe, void *call)
1531 {
1532    uint8_t patch_vertices = to_call(call, tc_patch_vertices)->patch_vertices;
1533 
1534    pipe->set_patch_vertices(pipe, patch_vertices);
1535    return call_size(tc_patch_vertices);
1536 }
1537 
1538 static void
tc_set_patch_vertices(struct pipe_context * _pipe,uint8_t patch_vertices)1539 tc_set_patch_vertices(struct pipe_context *_pipe, uint8_t patch_vertices)
1540 {
1541    struct threaded_context *tc = threaded_context(_pipe);
1542 
1543    tc_add_call(tc, TC_CALL_set_patch_vertices,
1544                tc_patch_vertices)->patch_vertices = patch_vertices;
1545 }
1546 
1547 struct tc_constant_buffer_base {
1548    struct tc_call_base base;
1549    uint8_t shader, index;
1550    bool is_null;
1551 };
1552 
1553 struct tc_constant_buffer {
1554    struct tc_constant_buffer_base base;
1555    struct pipe_constant_buffer cb;
1556 };
1557 
1558 static uint16_t ALWAYS_INLINE
tc_call_set_constant_buffer(struct pipe_context * pipe,void * call)1559 tc_call_set_constant_buffer(struct pipe_context *pipe, void *call)
1560 {
1561    struct tc_constant_buffer *p = (struct tc_constant_buffer *)call;
1562 
1563    if (unlikely(p->base.is_null)) {
1564       pipe->set_constant_buffer(pipe, p->base.shader, p->base.index, false, NULL);
1565       return call_size(tc_constant_buffer_base);
1566    }
1567 
1568    pipe->set_constant_buffer(pipe, p->base.shader, p->base.index, true, &p->cb);
1569    return call_size(tc_constant_buffer);
1570 }
1571 
1572 static void
tc_set_constant_buffer(struct pipe_context * _pipe,enum pipe_shader_type shader,uint index,bool take_ownership,const struct pipe_constant_buffer * cb)1573 tc_set_constant_buffer(struct pipe_context *_pipe,
1574                        enum pipe_shader_type shader, uint index,
1575                        bool take_ownership,
1576                        const struct pipe_constant_buffer *cb)
1577 {
1578    struct threaded_context *tc = threaded_context(_pipe);
1579 
1580    if (unlikely(!cb || (!cb->buffer && !cb->user_buffer))) {
1581       struct tc_constant_buffer_base *p =
1582          tc_add_call(tc, TC_CALL_set_constant_buffer, tc_constant_buffer_base);
1583       p->shader = shader;
1584       p->index = index;
1585       p->is_null = true;
1586       tc_unbind_buffer(&tc->const_buffers[shader][index]);
1587       return;
1588    }
1589 
1590    struct pipe_resource *buffer;
1591    unsigned offset;
1592 
1593    if (cb->user_buffer) {
1594       /* This must be done before adding set_constant_buffer, because it could
1595        * generate e.g. transfer_unmap and flush partially-uninitialized
1596        * set_constant_buffer to the driver if it was done afterwards.
1597        */
1598       buffer = NULL;
1599       u_upload_data(tc->base.const_uploader, 0, cb->buffer_size,
1600                     tc->ubo_alignment, cb->user_buffer, &offset, &buffer);
1601       u_upload_unmap(tc->base.const_uploader);
1602       take_ownership = true;
1603    } else {
1604       buffer = cb->buffer;
1605       offset = cb->buffer_offset;
1606    }
1607 
1608    struct tc_constant_buffer *p =
1609       tc_add_call(tc, TC_CALL_set_constant_buffer, tc_constant_buffer);
1610    p->base.shader = shader;
1611    p->base.index = index;
1612    p->base.is_null = false;
1613    p->cb.user_buffer = NULL;
1614    p->cb.buffer_offset = offset;
1615    p->cb.buffer_size = cb->buffer_size;
1616 
1617    if (take_ownership)
1618       p->cb.buffer = buffer;
1619    else
1620       tc_set_resource_reference(&p->cb.buffer, buffer);
1621 
1622    if (buffer) {
1623       tc_bind_buffer(&tc->const_buffers[shader][index],
1624                      &tc->buffer_lists[tc->next_buf_list], buffer);
1625    } else {
1626       tc_unbind_buffer(&tc->const_buffers[shader][index]);
1627    }
1628 }
1629 
1630 struct tc_inlinable_constants {
1631    struct tc_call_base base;
1632    uint8_t shader;
1633    uint8_t num_values;
1634    uint32_t values[MAX_INLINABLE_UNIFORMS];
1635 };
1636 
1637 static uint16_t ALWAYS_INLINE
tc_call_set_inlinable_constants(struct pipe_context * pipe,void * call)1638 tc_call_set_inlinable_constants(struct pipe_context *pipe, void *call)
1639 {
1640    struct tc_inlinable_constants *p = to_call(call, tc_inlinable_constants);
1641 
1642    pipe->set_inlinable_constants(pipe, p->shader, p->num_values, p->values);
1643    return call_size(tc_inlinable_constants);
1644 }
1645 
1646 static void
tc_set_inlinable_constants(struct pipe_context * _pipe,enum pipe_shader_type shader,uint num_values,uint32_t * values)1647 tc_set_inlinable_constants(struct pipe_context *_pipe,
1648                            enum pipe_shader_type shader,
1649                            uint num_values, uint32_t *values)
1650 {
1651    struct threaded_context *tc = threaded_context(_pipe);
1652    struct tc_inlinable_constants *p =
1653       tc_add_call(tc, TC_CALL_set_inlinable_constants, tc_inlinable_constants);
1654    p->shader = shader;
1655    p->num_values = num_values;
1656    memcpy(p->values, values, num_values * 4);
1657 }
1658 
1659 struct tc_sample_locations {
1660    struct tc_call_base base;
1661    uint16_t size;
1662    uint8_t slot[0];
1663 };
1664 
1665 
1666 static uint16_t ALWAYS_INLINE
tc_call_set_sample_locations(struct pipe_context * pipe,void * call)1667 tc_call_set_sample_locations(struct pipe_context *pipe, void *call)
1668 {
1669    struct tc_sample_locations *p = (struct tc_sample_locations *)call;
1670 
1671    pipe->set_sample_locations(pipe, p->size, p->slot);
1672    return p->base.num_slots;
1673 }
1674 
1675 static void
tc_set_sample_locations(struct pipe_context * _pipe,size_t size,const uint8_t * locations)1676 tc_set_sample_locations(struct pipe_context *_pipe, size_t size, const uint8_t *locations)
1677 {
1678    struct threaded_context *tc = threaded_context(_pipe);
1679    struct tc_sample_locations *p =
1680       tc_add_slot_based_call(tc, TC_CALL_set_sample_locations,
1681                              tc_sample_locations, size);
1682 
1683    p->size = size;
1684    memcpy(p->slot, locations, size);
1685 }
1686 
1687 struct tc_scissors {
1688    struct tc_call_base base;
1689    uint8_t start, count;
1690    struct pipe_scissor_state slot[0]; /* more will be allocated if needed */
1691 };
1692 
1693 static uint16_t ALWAYS_INLINE
tc_call_set_scissor_states(struct pipe_context * pipe,void * call)1694 tc_call_set_scissor_states(struct pipe_context *pipe, void *call)
1695 {
1696    struct tc_scissors *p = (struct tc_scissors *)call;
1697 
1698    pipe->set_scissor_states(pipe, p->start, p->count, p->slot);
1699    return p->base.num_slots;
1700 }
1701 
1702 static void
tc_set_scissor_states(struct pipe_context * _pipe,unsigned start,unsigned count,const struct pipe_scissor_state * states)1703 tc_set_scissor_states(struct pipe_context *_pipe,
1704                       unsigned start, unsigned count,
1705                       const struct pipe_scissor_state *states)
1706 {
1707    struct threaded_context *tc = threaded_context(_pipe);
1708    struct tc_scissors *p =
1709       tc_add_slot_based_call(tc, TC_CALL_set_scissor_states, tc_scissors, count);
1710 
1711    p->start = start;
1712    p->count = count;
1713    memcpy(&p->slot, states, count * sizeof(states[0]));
1714 }
1715 
1716 struct tc_viewports {
1717    struct tc_call_base base;
1718    uint8_t start, count;
1719    struct pipe_viewport_state slot[0]; /* more will be allocated if needed */
1720 };
1721 
1722 static uint16_t ALWAYS_INLINE
tc_call_set_viewport_states(struct pipe_context * pipe,void * call)1723 tc_call_set_viewport_states(struct pipe_context *pipe, void *call)
1724 {
1725    struct tc_viewports *p = (struct tc_viewports *)call;
1726 
1727    pipe->set_viewport_states(pipe, p->start, p->count, p->slot);
1728    return p->base.num_slots;
1729 }
1730 
1731 static void
tc_set_viewport_states(struct pipe_context * _pipe,unsigned start,unsigned count,const struct pipe_viewport_state * states)1732 tc_set_viewport_states(struct pipe_context *_pipe,
1733                        unsigned start, unsigned count,
1734                        const struct pipe_viewport_state *states)
1735 {
1736    if (!count)
1737       return;
1738 
1739    struct threaded_context *tc = threaded_context(_pipe);
1740    struct tc_viewports *p =
1741       tc_add_slot_based_call(tc, TC_CALL_set_viewport_states, tc_viewports, count);
1742 
1743    p->start = start;
1744    p->count = count;
1745    memcpy(&p->slot, states, count * sizeof(states[0]));
1746 }
1747 
1748 struct tc_window_rects {
1749    struct tc_call_base base;
1750    bool include;
1751    uint8_t count;
1752    struct pipe_scissor_state slot[0]; /* more will be allocated if needed */
1753 };
1754 
1755 static uint16_t ALWAYS_INLINE
tc_call_set_window_rectangles(struct pipe_context * pipe,void * call)1756 tc_call_set_window_rectangles(struct pipe_context *pipe, void *call)
1757 {
1758    struct tc_window_rects *p = (struct tc_window_rects *)call;
1759 
1760    pipe->set_window_rectangles(pipe, p->include, p->count, p->slot);
1761    return p->base.num_slots;
1762 }
1763 
1764 static void
tc_set_window_rectangles(struct pipe_context * _pipe,bool include,unsigned count,const struct pipe_scissor_state * rects)1765 tc_set_window_rectangles(struct pipe_context *_pipe, bool include,
1766                          unsigned count,
1767                          const struct pipe_scissor_state *rects)
1768 {
1769    struct threaded_context *tc = threaded_context(_pipe);
1770    struct tc_window_rects *p =
1771       tc_add_slot_based_call(tc, TC_CALL_set_window_rectangles, tc_window_rects, count);
1772 
1773    p->include = include;
1774    p->count = count;
1775    memcpy(p->slot, rects, count * sizeof(rects[0]));
1776 }
1777 
1778 struct tc_sampler_views {
1779    struct tc_call_base base;
1780    uint8_t shader, start, count, unbind_num_trailing_slots;
1781    struct pipe_sampler_view *slot[0]; /* more will be allocated if needed */
1782 };
1783 
1784 static uint16_t ALWAYS_INLINE
tc_call_set_sampler_views(struct pipe_context * pipe,void * call)1785 tc_call_set_sampler_views(struct pipe_context *pipe, void *call)
1786 {
1787    struct tc_sampler_views *p = (struct tc_sampler_views *)call;
1788 
1789    pipe->set_sampler_views(pipe, p->shader, p->start, p->count,
1790                            p->unbind_num_trailing_slots, true, p->slot);
1791    return p->base.num_slots;
1792 }
1793 
1794 static void
tc_set_sampler_views(struct pipe_context * _pipe,enum pipe_shader_type shader,unsigned start,unsigned count,unsigned unbind_num_trailing_slots,bool take_ownership,struct pipe_sampler_view ** views)1795 tc_set_sampler_views(struct pipe_context *_pipe,
1796                      enum pipe_shader_type shader,
1797                      unsigned start, unsigned count,
1798                      unsigned unbind_num_trailing_slots, bool take_ownership,
1799                      struct pipe_sampler_view **views)
1800 {
1801    if (!count && !unbind_num_trailing_slots)
1802       return;
1803 
1804    struct threaded_context *tc = threaded_context(_pipe);
1805    struct tc_sampler_views *p =
1806       tc_add_slot_based_call(tc, TC_CALL_set_sampler_views, tc_sampler_views,
1807                              views ? count : 0);
1808 
1809    p->shader = shader;
1810    p->start = start;
1811 
1812    if (views) {
1813       struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
1814 
1815       p->count = count;
1816       p->unbind_num_trailing_slots = unbind_num_trailing_slots;
1817 
1818       if (take_ownership) {
1819          memcpy(p->slot, views, sizeof(*views) * count);
1820 
1821          for (unsigned i = 0; i < count; i++) {
1822             if (views[i]) {
1823                if (views[i]->target == PIPE_BUFFER)
1824                   tc_bind_buffer(&tc->sampler_buffers[shader][start + i], next,
1825                                  views[i]->texture);
1826                else
1827                   tc_set_resource_batch_usage(tc, views[i]->texture);
1828             } else {
1829                tc_unbind_buffer(&tc->sampler_buffers[shader][start + i]);
1830             }
1831          }
1832       } else {
1833          for (unsigned i = 0; i < count; i++) {
1834             p->slot[i] = NULL;
1835             pipe_sampler_view_reference(&p->slot[i], views[i]);
1836 
1837             if (views[i]) {
1838                if (views[i]->target == PIPE_BUFFER)
1839                   tc_bind_buffer(&tc->sampler_buffers[shader][start + i], next,
1840                                  views[i]->texture);
1841                else
1842                   tc_set_resource_batch_usage(tc, views[i]->texture);
1843             } else {
1844                tc_unbind_buffer(&tc->sampler_buffers[shader][start + i]);
1845             }
1846          }
1847       }
1848 
1849       tc_unbind_buffers(&tc->sampler_buffers[shader][start + count],
1850                         unbind_num_trailing_slots);
1851       tc->seen_sampler_buffers[shader] = true;
1852    } else {
1853       p->count = 0;
1854       p->unbind_num_trailing_slots = count + unbind_num_trailing_slots;
1855 
1856       tc_unbind_buffers(&tc->sampler_buffers[shader][start],
1857                         count + unbind_num_trailing_slots);
1858    }
1859 }
1860 
1861 struct tc_shader_images {
1862    struct tc_call_base base;
1863    uint8_t shader, start, count;
1864    uint8_t unbind_num_trailing_slots;
1865    struct pipe_image_view slot[0]; /* more will be allocated if needed */
1866 };
1867 
1868 static uint16_t ALWAYS_INLINE
tc_call_set_shader_images(struct pipe_context * pipe,void * call)1869 tc_call_set_shader_images(struct pipe_context *pipe, void *call)
1870 {
1871    struct tc_shader_images *p = (struct tc_shader_images *)call;
1872    unsigned count = p->count;
1873 
1874    if (!p->count) {
1875       pipe->set_shader_images(pipe, p->shader, p->start, 0,
1876                               p->unbind_num_trailing_slots, NULL);
1877       return call_size(tc_shader_images);
1878    }
1879 
1880    pipe->set_shader_images(pipe, p->shader, p->start, p->count,
1881                            p->unbind_num_trailing_slots, p->slot);
1882 
1883    for (unsigned i = 0; i < count; i++)
1884       tc_drop_resource_reference(p->slot[i].resource);
1885 
1886    return p->base.num_slots;
1887 }
1888 
1889 static void
tc_set_shader_images(struct pipe_context * _pipe,enum pipe_shader_type shader,unsigned start,unsigned count,unsigned unbind_num_trailing_slots,const struct pipe_image_view * images)1890 tc_set_shader_images(struct pipe_context *_pipe,
1891                      enum pipe_shader_type shader,
1892                      unsigned start, unsigned count,
1893                      unsigned unbind_num_trailing_slots,
1894                      const struct pipe_image_view *images)
1895 {
1896    if (!count && !unbind_num_trailing_slots)
1897       return;
1898 
1899    struct threaded_context *tc = threaded_context(_pipe);
1900    struct tc_shader_images *p =
1901       tc_add_slot_based_call(tc, TC_CALL_set_shader_images, tc_shader_images,
1902                              images ? count : 0);
1903    unsigned writable_buffers = 0;
1904 
1905    p->shader = shader;
1906    p->start = start;
1907 
1908    if (images) {
1909       p->count = count;
1910       p->unbind_num_trailing_slots = unbind_num_trailing_slots;
1911 
1912       struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
1913 
1914       for (unsigned i = 0; i < count; i++) {
1915          struct pipe_resource *resource = images[i].resource;
1916 
1917          tc_set_resource_reference(&p->slot[i].resource, resource);
1918 
1919          if (resource) {
1920             if (resource->target == PIPE_BUFFER) {
1921                tc_bind_buffer(&tc->image_buffers[shader][start + i], next, resource);
1922 
1923                if (images[i].access & PIPE_IMAGE_ACCESS_WRITE) {
1924                   struct threaded_resource *tres = threaded_resource(resource);
1925 
1926                   tc_buffer_disable_cpu_storage(resource);
1927                   util_range_add(&tres->b, &tres->valid_buffer_range,
1928                                  images[i].u.buf.offset,
1929                                  images[i].u.buf.offset + images[i].u.buf.size);
1930                   writable_buffers |= BITFIELD_BIT(start + i);
1931                }
1932             } else {
1933                tc_set_resource_batch_usage(tc, resource);
1934             }
1935          } else {
1936             tc_unbind_buffer(&tc->image_buffers[shader][start + i]);
1937          }
1938       }
1939       memcpy(p->slot, images, count * sizeof(images[0]));
1940 
1941       tc_unbind_buffers(&tc->image_buffers[shader][start + count],
1942                         unbind_num_trailing_slots);
1943       tc->seen_image_buffers[shader] = true;
1944    } else {
1945       p->count = 0;
1946       p->unbind_num_trailing_slots = count + unbind_num_trailing_slots;
1947 
1948       tc_unbind_buffers(&tc->image_buffers[shader][start],
1949                         count + unbind_num_trailing_slots);
1950    }
1951 
1952    tc->image_buffers_writeable_mask[shader] &= ~BITFIELD_RANGE(start, count);
1953    tc->image_buffers_writeable_mask[shader] |= writable_buffers;
1954 }
1955 
1956 struct tc_shader_buffers {
1957    struct tc_call_base base;
1958    uint8_t shader, start, count;
1959    bool unbind;
1960    unsigned writable_bitmask;
1961    struct pipe_shader_buffer slot[0]; /* more will be allocated if needed */
1962 };
1963 
1964 static uint16_t ALWAYS_INLINE
tc_call_set_shader_buffers(struct pipe_context * pipe,void * call)1965 tc_call_set_shader_buffers(struct pipe_context *pipe, void *call)
1966 {
1967    struct tc_shader_buffers *p = (struct tc_shader_buffers *)call;
1968    unsigned count = p->count;
1969 
1970    if (p->unbind) {
1971       pipe->set_shader_buffers(pipe, p->shader, p->start, p->count, NULL, 0);
1972       return call_size(tc_shader_buffers);
1973    }
1974 
1975    pipe->set_shader_buffers(pipe, p->shader, p->start, p->count, p->slot,
1976                             p->writable_bitmask);
1977 
1978    for (unsigned i = 0; i < count; i++)
1979       tc_drop_resource_reference(p->slot[i].buffer);
1980 
1981    return p->base.num_slots;
1982 }
1983 
1984 static void
tc_set_shader_buffers(struct pipe_context * _pipe,enum pipe_shader_type shader,unsigned start,unsigned count,const struct pipe_shader_buffer * buffers,unsigned writable_bitmask)1985 tc_set_shader_buffers(struct pipe_context *_pipe,
1986                       enum pipe_shader_type shader,
1987                       unsigned start, unsigned count,
1988                       const struct pipe_shader_buffer *buffers,
1989                       unsigned writable_bitmask)
1990 {
1991    if (!count)
1992       return;
1993 
1994    struct threaded_context *tc = threaded_context(_pipe);
1995    struct tc_shader_buffers *p =
1996       tc_add_slot_based_call(tc, TC_CALL_set_shader_buffers, tc_shader_buffers,
1997                              buffers ? count : 0);
1998 
1999    p->shader = shader;
2000    p->start = start;
2001    p->count = count;
2002    p->unbind = buffers == NULL;
2003    p->writable_bitmask = writable_bitmask;
2004 
2005    if (buffers) {
2006       struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
2007 
2008       for (unsigned i = 0; i < count; i++) {
2009          struct pipe_shader_buffer *dst = &p->slot[i];
2010          const struct pipe_shader_buffer *src = buffers + i;
2011 
2012          tc_set_resource_reference(&dst->buffer, src->buffer);
2013          dst->buffer_offset = src->buffer_offset;
2014          dst->buffer_size = src->buffer_size;
2015 
2016          if (src->buffer) {
2017             struct threaded_resource *tres = threaded_resource(src->buffer);
2018 
2019             tc_bind_buffer(&tc->shader_buffers[shader][start + i], next, &tres->b);
2020 
2021             if (writable_bitmask & BITFIELD_BIT(i)) {
2022                tc_buffer_disable_cpu_storage(src->buffer);
2023                util_range_add(&tres->b, &tres->valid_buffer_range,
2024                               src->buffer_offset,
2025                               src->buffer_offset + src->buffer_size);
2026             }
2027          } else {
2028             tc_unbind_buffer(&tc->shader_buffers[shader][start + i]);
2029          }
2030       }
2031       tc->seen_shader_buffers[shader] = true;
2032    } else {
2033       tc_unbind_buffers(&tc->shader_buffers[shader][start], count);
2034    }
2035 
2036    tc->shader_buffers_writeable_mask[shader] &= ~BITFIELD_RANGE(start, count);
2037    tc->shader_buffers_writeable_mask[shader] |= writable_bitmask << start;
2038 }
2039 
2040 static uint16_t ALWAYS_INLINE
tc_call_set_vertex_buffers(struct pipe_context * pipe,void * call)2041 tc_call_set_vertex_buffers(struct pipe_context *pipe, void *call)
2042 {
2043    struct tc_vertex_buffers *p = (struct tc_vertex_buffers *)call;
2044    unsigned count = p->count;
2045 
2046    for (unsigned i = 0; i < count; i++)
2047       tc_assert(!p->slot[i].is_user_buffer);
2048 
2049    pipe->set_vertex_buffers(pipe, count, p->slot);
2050    return p->base.num_slots;
2051 }
2052 
2053 static void
tc_set_vertex_buffers(struct pipe_context * _pipe,unsigned count,const struct pipe_vertex_buffer * buffers)2054 tc_set_vertex_buffers(struct pipe_context *_pipe, unsigned count,
2055                       const struct pipe_vertex_buffer *buffers)
2056 {
2057    struct threaded_context *tc = threaded_context(_pipe);
2058 
2059    assert(!count || buffers);
2060 
2061    if (count) {
2062       struct tc_vertex_buffers *p =
2063          tc_add_slot_based_call(tc, TC_CALL_set_vertex_buffers, tc_vertex_buffers, count);
2064       p->count = count;
2065 
2066       struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
2067 
2068       memcpy(p->slot, buffers, count * sizeof(struct pipe_vertex_buffer));
2069 
2070       for (unsigned i = 0; i < count; i++) {
2071          struct pipe_resource *buf = buffers[i].buffer.resource;
2072 
2073          if (buf) {
2074             tc_bind_buffer(&tc->vertex_buffers[i], next, buf);
2075          } else {
2076             tc_unbind_buffer(&tc->vertex_buffers[i]);
2077          }
2078       }
2079    } else {
2080       struct tc_vertex_buffers *p =
2081          tc_add_slot_based_call(tc, TC_CALL_set_vertex_buffers, tc_vertex_buffers, 0);
2082       p->count = 0;
2083    }
2084 
2085    /* We don't need to unbind trailing buffers because we never touch bindings
2086     * after num_vertex_buffers.
2087     */
2088    tc->num_vertex_buffers = count;
2089 }
2090 
2091 struct pipe_vertex_buffer *
tc_add_set_vertex_buffers_call(struct pipe_context * _pipe,unsigned count)2092 tc_add_set_vertex_buffers_call(struct pipe_context *_pipe, unsigned count)
2093 {
2094    struct threaded_context *tc = threaded_context(_pipe);
2095 
2096    /* We don't need to unbind trailing buffers because we never touch bindings
2097     * after num_vertex_buffers.
2098     */
2099    tc->num_vertex_buffers = count;
2100 
2101    struct tc_vertex_buffers *p =
2102       tc_add_slot_based_call(tc, TC_CALL_set_vertex_buffers, tc_vertex_buffers, count);
2103    p->count = count;
2104    return p->slot;
2105 }
2106 
2107 struct tc_stream_outputs {
2108    struct tc_call_base base;
2109    uint8_t count;
2110    uint8_t output_prim;
2111    struct pipe_stream_output_target *targets[PIPE_MAX_SO_BUFFERS];
2112    unsigned offsets[PIPE_MAX_SO_BUFFERS];
2113 };
2114 
2115 static uint16_t ALWAYS_INLINE
tc_call_set_stream_output_targets(struct pipe_context * pipe,void * call)2116 tc_call_set_stream_output_targets(struct pipe_context *pipe, void *call)
2117 {
2118    struct tc_stream_outputs *p = to_call(call, tc_stream_outputs);
2119    unsigned count = p->count;
2120 
2121    pipe->set_stream_output_targets(pipe, count, p->targets, p->offsets,
2122                                    p->output_prim);
2123    for (unsigned i = 0; i < count; i++)
2124       tc_drop_so_target_reference(p->targets[i]);
2125 
2126    return call_size(tc_stream_outputs);
2127 }
2128 
2129 static void
tc_set_stream_output_targets(struct pipe_context * _pipe,unsigned count,struct pipe_stream_output_target ** tgs,const unsigned * offsets,enum mesa_prim output_prim)2130 tc_set_stream_output_targets(struct pipe_context *_pipe,
2131                              unsigned count,
2132                              struct pipe_stream_output_target **tgs,
2133                              const unsigned *offsets,
2134                              enum mesa_prim output_prim)
2135 {
2136    struct threaded_context *tc = threaded_context(_pipe);
2137    struct tc_stream_outputs *p =
2138       tc_add_call(tc, TC_CALL_set_stream_output_targets, tc_stream_outputs);
2139    struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
2140 
2141    for (unsigned i = 0; i < count; i++) {
2142       p->targets[i] = NULL;
2143       pipe_so_target_reference(&p->targets[i], tgs[i]);
2144       if (tgs[i]) {
2145          tc_buffer_disable_cpu_storage(tgs[i]->buffer);
2146          tc_bind_buffer(&tc->streamout_buffers[i], next, tgs[i]->buffer);
2147       } else {
2148          tc_unbind_buffer(&tc->streamout_buffers[i]);
2149       }
2150    }
2151    p->count = count;
2152    p->output_prim = output_prim;
2153    memcpy(p->offsets, offsets, count * sizeof(unsigned));
2154 
2155    tc_unbind_buffers(&tc->streamout_buffers[count], PIPE_MAX_SO_BUFFERS - count);
2156    if (count)
2157       tc->seen_streamout_buffers = true;
2158 }
2159 
2160 static void
tc_set_compute_resources(struct pipe_context * _pipe,unsigned start,unsigned count,struct pipe_surface ** resources)2161 tc_set_compute_resources(struct pipe_context *_pipe, unsigned start,
2162                          unsigned count, struct pipe_surface **resources)
2163 {
2164    struct threaded_context *tc = threaded_context(_pipe);
2165    struct pipe_context *pipe = tc->pipe;
2166 
2167    tc_sync(tc);
2168    pipe->set_compute_resources(pipe, start, count, resources);
2169 }
2170 
2171 static void
tc_set_global_binding(struct pipe_context * _pipe,unsigned first,unsigned count,struct pipe_resource ** resources,uint32_t ** handles)2172 tc_set_global_binding(struct pipe_context *_pipe, unsigned first,
2173                       unsigned count, struct pipe_resource **resources,
2174                       uint32_t **handles)
2175 {
2176    struct threaded_context *tc = threaded_context(_pipe);
2177    struct pipe_context *pipe = tc->pipe;
2178 
2179    tc_sync(tc);
2180    pipe->set_global_binding(pipe, first, count, resources, handles);
2181 }
2182 
2183 
2184 /********************************************************************
2185  * views
2186  */
2187 
2188 static struct pipe_surface *
tc_create_surface(struct pipe_context * _pipe,struct pipe_resource * resource,const struct pipe_surface * surf_tmpl)2189 tc_create_surface(struct pipe_context *_pipe,
2190                   struct pipe_resource *resource,
2191                   const struct pipe_surface *surf_tmpl)
2192 {
2193    struct pipe_context *pipe = threaded_context(_pipe)->pipe;
2194    struct pipe_surface *view =
2195          pipe->create_surface(pipe, resource, surf_tmpl);
2196 
2197    if (view)
2198       view->context = _pipe;
2199    return view;
2200 }
2201 
2202 static void
tc_surface_destroy(struct pipe_context * _pipe,struct pipe_surface * surf)2203 tc_surface_destroy(struct pipe_context *_pipe,
2204                    struct pipe_surface *surf)
2205 {
2206    struct pipe_context *pipe = threaded_context(_pipe)->pipe;
2207 
2208    pipe->surface_destroy(pipe, surf);
2209 }
2210 
2211 static struct pipe_sampler_view *
tc_create_sampler_view(struct pipe_context * _pipe,struct pipe_resource * resource,const struct pipe_sampler_view * templ)2212 tc_create_sampler_view(struct pipe_context *_pipe,
2213                        struct pipe_resource *resource,
2214                        const struct pipe_sampler_view *templ)
2215 {
2216    struct pipe_context *pipe = threaded_context(_pipe)->pipe;
2217    struct pipe_sampler_view *view =
2218          pipe->create_sampler_view(pipe, resource, templ);
2219 
2220    if (view)
2221       view->context = _pipe;
2222    return view;
2223 }
2224 
2225 static void
tc_sampler_view_destroy(struct pipe_context * _pipe,struct pipe_sampler_view * view)2226 tc_sampler_view_destroy(struct pipe_context *_pipe,
2227                         struct pipe_sampler_view *view)
2228 {
2229    struct pipe_context *pipe = threaded_context(_pipe)->pipe;
2230 
2231    pipe->sampler_view_destroy(pipe, view);
2232 }
2233 
2234 static struct pipe_stream_output_target *
tc_create_stream_output_target(struct pipe_context * _pipe,struct pipe_resource * res,unsigned buffer_offset,unsigned buffer_size)2235 tc_create_stream_output_target(struct pipe_context *_pipe,
2236                                struct pipe_resource *res,
2237                                unsigned buffer_offset,
2238                                unsigned buffer_size)
2239 {
2240    struct pipe_context *pipe = threaded_context(_pipe)->pipe;
2241    struct threaded_resource *tres = threaded_resource(res);
2242    struct pipe_stream_output_target *view;
2243 
2244    util_range_add(&tres->b, &tres->valid_buffer_range, buffer_offset,
2245                   buffer_offset + buffer_size);
2246 
2247    view = pipe->create_stream_output_target(pipe, res, buffer_offset,
2248                                             buffer_size);
2249    if (view)
2250       view->context = _pipe;
2251    return view;
2252 }
2253 
2254 static void
tc_stream_output_target_destroy(struct pipe_context * _pipe,struct pipe_stream_output_target * target)2255 tc_stream_output_target_destroy(struct pipe_context *_pipe,
2256                                 struct pipe_stream_output_target *target)
2257 {
2258    struct pipe_context *pipe = threaded_context(_pipe)->pipe;
2259 
2260    pipe->stream_output_target_destroy(pipe, target);
2261 }
2262 
2263 
2264 /********************************************************************
2265  * bindless
2266  */
2267 
2268 static uint64_t
tc_create_texture_handle(struct pipe_context * _pipe,struct pipe_sampler_view * view,const struct pipe_sampler_state * state)2269 tc_create_texture_handle(struct pipe_context *_pipe,
2270                          struct pipe_sampler_view *view,
2271                          const struct pipe_sampler_state *state)
2272 {
2273    struct threaded_context *tc = threaded_context(_pipe);
2274    struct pipe_context *pipe = tc->pipe;
2275 
2276    tc_sync(tc);
2277    return pipe->create_texture_handle(pipe, view, state);
2278 }
2279 
2280 struct tc_make_texture_handle_resident {
2281    struct tc_call_base base;
2282    bool resident;
2283    uint64_t handle;
2284 };
2285 
2286 static uint16_t ALWAYS_INLINE
tc_call_make_texture_handle_resident(struct pipe_context * pipe,void * call)2287 tc_call_make_texture_handle_resident(struct pipe_context *pipe, void *call)
2288 {
2289    struct tc_make_texture_handle_resident *p =
2290       to_call(call, tc_make_texture_handle_resident);
2291 
2292    pipe->make_texture_handle_resident(pipe, p->handle, p->resident);
2293    return call_size(tc_make_texture_handle_resident);
2294 }
2295 
2296 static void
tc_make_texture_handle_resident(struct pipe_context * _pipe,uint64_t handle,bool resident)2297 tc_make_texture_handle_resident(struct pipe_context *_pipe, uint64_t handle,
2298                                 bool resident)
2299 {
2300    struct threaded_context *tc = threaded_context(_pipe);
2301    struct tc_make_texture_handle_resident *p =
2302       tc_add_call(tc, TC_CALL_make_texture_handle_resident,
2303                   tc_make_texture_handle_resident);
2304 
2305    p->handle = handle;
2306    p->resident = resident;
2307 }
2308 
2309 static uint64_t
tc_create_image_handle(struct pipe_context * _pipe,const struct pipe_image_view * image)2310 tc_create_image_handle(struct pipe_context *_pipe,
2311                        const struct pipe_image_view *image)
2312 {
2313    struct threaded_context *tc = threaded_context(_pipe);
2314    struct pipe_context *pipe = tc->pipe;
2315    struct pipe_resource *resource = image->resource;
2316 
2317    if (image->access & PIPE_IMAGE_ACCESS_WRITE &&
2318        resource && resource->target == PIPE_BUFFER) {
2319       struct threaded_resource *tres = threaded_resource(resource);
2320 
2321       /* The CPU storage doesn't support writable buffer. */
2322       tc_buffer_disable_cpu_storage(resource);
2323 
2324       util_range_add(&tres->b, &tres->valid_buffer_range,
2325                      image->u.buf.offset,
2326                      image->u.buf.offset + image->u.buf.size);
2327    }
2328 
2329    tc_sync(tc);
2330    return pipe->create_image_handle(pipe, image);
2331 }
2332 
2333 struct tc_make_image_handle_resident {
2334    struct tc_call_base base;
2335    bool resident;
2336    unsigned access;
2337    uint64_t handle;
2338 };
2339 
2340 static uint16_t ALWAYS_INLINE
tc_call_make_image_handle_resident(struct pipe_context * pipe,void * call)2341 tc_call_make_image_handle_resident(struct pipe_context *pipe, void *call)
2342 {
2343    struct tc_make_image_handle_resident *p =
2344       to_call(call, tc_make_image_handle_resident);
2345 
2346    pipe->make_image_handle_resident(pipe, p->handle, p->access, p->resident);
2347    return call_size(tc_make_image_handle_resident);
2348 }
2349 
2350 static void
tc_make_image_handle_resident(struct pipe_context * _pipe,uint64_t handle,unsigned access,bool resident)2351 tc_make_image_handle_resident(struct pipe_context *_pipe, uint64_t handle,
2352                               unsigned access, bool resident)
2353 {
2354    struct threaded_context *tc = threaded_context(_pipe);
2355    struct tc_make_image_handle_resident *p =
2356       tc_add_call(tc, TC_CALL_make_image_handle_resident,
2357                   tc_make_image_handle_resident);
2358 
2359    p->handle = handle;
2360    p->access = access;
2361    p->resident = resident;
2362 }
2363 
2364 
2365 /********************************************************************
2366  * transfer
2367  */
2368 
2369 static void
2370 tc_flush(struct pipe_context *_pipe, struct pipe_fence_handle **fence,
2371          unsigned flags);
2372 
2373 struct tc_replace_buffer_storage {
2374    struct tc_call_base base;
2375    uint16_t num_rebinds;
2376    uint32_t rebind_mask;
2377    uint32_t delete_buffer_id;
2378    struct pipe_resource *dst;
2379    struct pipe_resource *src;
2380    tc_replace_buffer_storage_func func;
2381 };
2382 
2383 static uint16_t ALWAYS_INLINE
tc_call_replace_buffer_storage(struct pipe_context * pipe,void * call)2384 tc_call_replace_buffer_storage(struct pipe_context *pipe, void *call)
2385 {
2386    struct tc_replace_buffer_storage *p = to_call(call, tc_replace_buffer_storage);
2387 
2388    p->func(pipe, p->dst, p->src, p->num_rebinds, p->rebind_mask, p->delete_buffer_id);
2389 
2390    tc_drop_resource_reference(p->dst);
2391    tc_drop_resource_reference(p->src);
2392    return call_size(tc_replace_buffer_storage);
2393 }
2394 
2395 /* Return true if the buffer has been invalidated or is idle. */
2396 static bool
tc_invalidate_buffer(struct threaded_context * tc,struct threaded_resource * tbuf)2397 tc_invalidate_buffer(struct threaded_context *tc,
2398                      struct threaded_resource *tbuf)
2399 {
2400    if (!tc_is_buffer_busy(tc, tbuf, PIPE_MAP_READ_WRITE)) {
2401       /* It's idle, so invalidation would be a no-op, but we can still clear
2402        * the valid range because we are technically doing invalidation, but
2403        * skipping it because it's useless.
2404        *
2405        * If the buffer is bound for write, we can't invalidate the range.
2406        */
2407       if (!tc_is_buffer_bound_for_write(tc, tbuf->buffer_id_unique))
2408          util_range_set_empty(&tbuf->valid_buffer_range);
2409       return true;
2410    }
2411 
2412    struct pipe_screen *screen = tc->base.screen;
2413    struct pipe_resource *new_buf;
2414 
2415    /* Shared, pinned, and sparse buffers can't be reallocated. */
2416    if (tbuf->is_shared ||
2417        tbuf->is_user_ptr ||
2418        tbuf->b.flags & (PIPE_RESOURCE_FLAG_SPARSE | PIPE_RESOURCE_FLAG_UNMAPPABLE))
2419       return false;
2420 
2421    assert(tbuf->b.target == PIPE_BUFFER);
2422    tc->bytes_replaced_estimate += tbuf->b.width0;
2423 
2424    if (tc->bytes_replaced_limit && (tc->bytes_replaced_estimate > tc->bytes_replaced_limit)) {
2425       tc_flush(&tc->base, NULL, PIPE_FLUSH_ASYNC);
2426    }
2427 
2428    /* Allocate a new one. */
2429    new_buf = screen->resource_create(screen, &tbuf->b);
2430    if (!new_buf)
2431       return false;
2432 
2433    /* Replace the "latest" pointer. */
2434    if (tbuf->latest != &tbuf->b)
2435       pipe_resource_reference(&tbuf->latest, NULL);
2436 
2437    tbuf->latest = new_buf;
2438 
2439    uint32_t delete_buffer_id = tbuf->buffer_id_unique;
2440 
2441    /* Enqueue storage replacement of the original buffer. */
2442    struct tc_replace_buffer_storage *p =
2443       tc_add_call(tc, TC_CALL_replace_buffer_storage,
2444                   tc_replace_buffer_storage);
2445 
2446    p->func = tc->replace_buffer_storage;
2447    tc_set_resource_reference(&p->dst, &tbuf->b);
2448    tc_set_resource_reference(&p->src, new_buf);
2449    p->delete_buffer_id = delete_buffer_id;
2450    p->rebind_mask = 0;
2451 
2452    /* Treat the current buffer as the new buffer. */
2453    bool bound_for_write = tc_is_buffer_bound_for_write(tc, tbuf->buffer_id_unique);
2454    p->num_rebinds = tc_rebind_buffer(tc, tbuf->buffer_id_unique,
2455                                      threaded_resource(new_buf)->buffer_id_unique,
2456                                      &p->rebind_mask);
2457 
2458    /* If the buffer is not bound for write, clear the valid range. */
2459    if (!bound_for_write)
2460       util_range_set_empty(&tbuf->valid_buffer_range);
2461 
2462    tbuf->buffer_id_unique = threaded_resource(new_buf)->buffer_id_unique;
2463    threaded_resource(new_buf)->buffer_id_unique = 0;
2464 
2465    return true;
2466 }
2467 
2468 static unsigned
tc_improve_map_buffer_flags(struct threaded_context * tc,struct threaded_resource * tres,unsigned usage,unsigned offset,unsigned size)2469 tc_improve_map_buffer_flags(struct threaded_context *tc,
2470                             struct threaded_resource *tres, unsigned usage,
2471                             unsigned offset, unsigned size)
2472 {
2473    /* Never invalidate inside the driver and never infer "unsynchronized". */
2474    unsigned tc_flags = TC_TRANSFER_MAP_NO_INVALIDATE |
2475                        TC_TRANSFER_MAP_NO_INFER_UNSYNCHRONIZED;
2476 
2477    /* Prevent a reentry. */
2478    if (usage & tc_flags)
2479       return usage;
2480 
2481    /* Use the staging upload if it's preferred. */
2482    if (usage & (PIPE_MAP_DISCARD_RANGE |
2483                 PIPE_MAP_DISCARD_WHOLE_RESOURCE) &&
2484        !(usage & PIPE_MAP_PERSISTENT) &&
2485        tres->b.flags & PIPE_RESOURCE_FLAG_DONT_MAP_DIRECTLY &&
2486        tc->use_forced_staging_uploads) {
2487       usage &= ~(PIPE_MAP_DISCARD_WHOLE_RESOURCE |
2488                  PIPE_MAP_UNSYNCHRONIZED);
2489 
2490       return usage | tc_flags | PIPE_MAP_DISCARD_RANGE;
2491    }
2492 
2493    /* Sparse buffers can't be mapped directly and can't be reallocated
2494     * (fully invalidated). That may just be a radeonsi limitation, but
2495     * the threaded context must obey it with radeonsi.
2496     */
2497    if (tres->b.flags & (PIPE_RESOURCE_FLAG_SPARSE | PIPE_RESOURCE_FLAG_UNMAPPABLE)) {
2498       /* We can use DISCARD_RANGE instead of full discard. This is the only
2499        * fast path for sparse buffers that doesn't need thread synchronization.
2500        */
2501       if (usage & PIPE_MAP_DISCARD_WHOLE_RESOURCE)
2502          usage |= PIPE_MAP_DISCARD_RANGE;
2503 
2504       /* Allow DISCARD_WHOLE_RESOURCE and infering UNSYNCHRONIZED in drivers.
2505        * The threaded context doesn't do unsychronized mappings and invalida-
2506        * tions of sparse buffers, therefore a correct driver behavior won't
2507        * result in an incorrect behavior with the threaded context.
2508        */
2509       return usage;
2510    }
2511 
2512    usage |= tc_flags;
2513 
2514    /* Handle CPU reads trivially. */
2515    if (usage & PIPE_MAP_READ) {
2516       if (usage & PIPE_MAP_UNSYNCHRONIZED)
2517          usage |= TC_TRANSFER_MAP_THREADED_UNSYNC; /* don't sync */
2518 
2519       /* Drivers aren't allowed to do buffer invalidations. */
2520       return usage & ~PIPE_MAP_DISCARD_WHOLE_RESOURCE;
2521    }
2522 
2523    /* See if the buffer range being mapped has never been initialized or
2524     * the buffer is idle, in which case it can be mapped unsynchronized. */
2525    if (!(usage & PIPE_MAP_UNSYNCHRONIZED) &&
2526        ((!tres->is_shared &&
2527          !util_ranges_intersect(&tres->valid_buffer_range, offset, offset + size)) ||
2528         !tc_is_buffer_busy(tc, tres, usage)))
2529       usage |= PIPE_MAP_UNSYNCHRONIZED;
2530 
2531    if (!(usage & PIPE_MAP_UNSYNCHRONIZED)) {
2532       /* If discarding the entire valid range, discard the whole resource instead. */
2533       if (usage & PIPE_MAP_DISCARD_RANGE &&
2534           util_ranges_covered(&tres->valid_buffer_range, offset, offset + size))
2535          usage |= PIPE_MAP_DISCARD_WHOLE_RESOURCE;
2536 
2537       /* Discard the whole resource if needed. */
2538       if (usage & PIPE_MAP_DISCARD_WHOLE_RESOURCE) {
2539          if (tc_invalidate_buffer(tc, tres))
2540             usage |= PIPE_MAP_UNSYNCHRONIZED;
2541          else
2542             usage |= PIPE_MAP_DISCARD_RANGE; /* fallback */
2543       }
2544    }
2545 
2546    /* We won't need this flag anymore. */
2547    /* TODO: We might not need TC_TRANSFER_MAP_NO_INVALIDATE with this. */
2548    usage &= ~PIPE_MAP_DISCARD_WHOLE_RESOURCE;
2549 
2550    /* GL_AMD_pinned_memory and persistent mappings can't use staging
2551     * buffers. */
2552    if (usage & (PIPE_MAP_UNSYNCHRONIZED |
2553                 PIPE_MAP_PERSISTENT) ||
2554        tres->is_user_ptr)
2555       usage &= ~PIPE_MAP_DISCARD_RANGE;
2556 
2557    /* Unsychronized buffer mappings don't have to synchronize the thread. */
2558    if (usage & PIPE_MAP_UNSYNCHRONIZED) {
2559       usage &= ~PIPE_MAP_DISCARD_RANGE;
2560       usage |= TC_TRANSFER_MAP_THREADED_UNSYNC; /* notify the driver */
2561    }
2562 
2563    return usage;
2564 }
2565 
2566 static void *
tc_buffer_map(struct pipe_context * _pipe,struct pipe_resource * resource,unsigned level,unsigned usage,const struct pipe_box * box,struct pipe_transfer ** transfer)2567 tc_buffer_map(struct pipe_context *_pipe,
2568               struct pipe_resource *resource, unsigned level,
2569               unsigned usage, const struct pipe_box *box,
2570               struct pipe_transfer **transfer)
2571 {
2572    struct threaded_context *tc = threaded_context(_pipe);
2573    struct threaded_resource *tres = threaded_resource(resource);
2574    struct pipe_context *pipe = tc->pipe;
2575 
2576    /* PIPE_MAP_THREAD_SAFE is for glthread, which shouldn't use the CPU storage and
2577     * this shouldn't normally be necessary because glthread only uses large buffers.
2578     */
2579    if (usage & PIPE_MAP_THREAD_SAFE)
2580       tc_buffer_disable_cpu_storage(resource);
2581 
2582    usage = tc_improve_map_buffer_flags(tc, tres, usage, box->x, box->width);
2583 
2584    /* If the CPU storage is enabled, return it directly. */
2585    if (tres->allow_cpu_storage && !(usage & TC_TRANSFER_MAP_UPLOAD_CPU_STORAGE)) {
2586       /* We can't let resource_copy_region disable the CPU storage. */
2587       assert(!(tres->b.flags & PIPE_RESOURCE_FLAG_DONT_MAP_DIRECTLY));
2588 
2589       if (!tres->cpu_storage) {
2590          tres->cpu_storage = align_malloc(resource->width0, tc->map_buffer_alignment);
2591 
2592          if (tres->cpu_storage && tres->valid_buffer_range.end) {
2593             /* The GPU buffer contains valid data. Copy them to the CPU storage. */
2594             struct pipe_box box2;
2595             struct pipe_transfer *transfer2;
2596 
2597             unsigned valid_range_len = tres->valid_buffer_range.end - tres->valid_buffer_range.start;
2598             u_box_1d(tres->valid_buffer_range.start, valid_range_len, &box2);
2599 
2600             tc_sync_msg(tc, "cpu storage GPU -> CPU copy");
2601             tc_set_driver_thread(tc);
2602 
2603             void *ret = pipe->buffer_map(pipe, tres->latest ? tres->latest : resource,
2604                                          0, PIPE_MAP_READ, &box2, &transfer2);
2605             memcpy(&((uint8_t*)tres->cpu_storage)[tres->valid_buffer_range.start],
2606                    ret,
2607                    valid_range_len);
2608             pipe->buffer_unmap(pipe, transfer2);
2609 
2610             tc_clear_driver_thread(tc);
2611          }
2612       }
2613 
2614       if (tres->cpu_storage) {
2615          struct threaded_transfer *ttrans = slab_zalloc(&tc->pool_transfers);
2616          ttrans->b.resource = resource;
2617          ttrans->b.usage = usage;
2618          ttrans->b.box = *box;
2619          ttrans->valid_buffer_range = &tres->valid_buffer_range;
2620          ttrans->cpu_storage_mapped = true;
2621          *transfer = &ttrans->b;
2622 
2623          return (uint8_t*)tres->cpu_storage + box->x;
2624       } else {
2625          tres->allow_cpu_storage = false;
2626       }
2627    }
2628 
2629    /* Do a staging transfer within the threaded context. The driver should
2630     * only get resource_copy_region.
2631     */
2632    if (usage & PIPE_MAP_DISCARD_RANGE) {
2633       struct threaded_transfer *ttrans = slab_zalloc(&tc->pool_transfers);
2634       uint8_t *map;
2635 
2636       u_upload_alloc(tc->base.stream_uploader, 0,
2637                      box->width + (box->x % tc->map_buffer_alignment),
2638                      tc->map_buffer_alignment, &ttrans->b.offset,
2639                      &ttrans->staging, (void**)&map);
2640       if (!map) {
2641          slab_free(&tc->pool_transfers, ttrans);
2642          return NULL;
2643       }
2644 
2645       ttrans->b.resource = resource;
2646       ttrans->b.level = 0;
2647       ttrans->b.usage = usage;
2648       ttrans->b.box = *box;
2649       ttrans->b.stride = 0;
2650       ttrans->b.layer_stride = 0;
2651       ttrans->valid_buffer_range = &tres->valid_buffer_range;
2652       ttrans->cpu_storage_mapped = false;
2653       *transfer = &ttrans->b;
2654 
2655       p_atomic_inc(&tres->pending_staging_uploads);
2656       util_range_add(resource, &tres->pending_staging_uploads_range,
2657                      box->x, box->x + box->width);
2658 
2659       return map + (box->x % tc->map_buffer_alignment);
2660    }
2661 
2662    if (usage & PIPE_MAP_UNSYNCHRONIZED &&
2663        p_atomic_read(&tres->pending_staging_uploads) &&
2664        util_ranges_intersect(&tres->pending_staging_uploads_range, box->x, box->x + box->width)) {
2665       /* Write conflict detected between a staging transfer and the direct mapping we're
2666        * going to do. Resolve the conflict by ignoring UNSYNCHRONIZED so the direct mapping
2667        * will have to wait for the staging transfer completion.
2668        * Note: The conflict detection is only based on the mapped range, not on the actual
2669        * written range(s).
2670        */
2671       usage &= ~PIPE_MAP_UNSYNCHRONIZED & ~TC_TRANSFER_MAP_THREADED_UNSYNC;
2672       tc->use_forced_staging_uploads = false;
2673    }
2674 
2675    /* Unsychronized buffer mappings don't have to synchronize the thread. */
2676    if (!(usage & TC_TRANSFER_MAP_THREADED_UNSYNC)) {
2677       tc_sync_msg(tc, usage & PIPE_MAP_DISCARD_RANGE ? "  discard_range" :
2678                       usage & PIPE_MAP_READ ? "  read" : "  staging conflict");
2679       tc_set_driver_thread(tc);
2680    }
2681 
2682    tc->bytes_mapped_estimate += box->width;
2683 
2684    void *ret = pipe->buffer_map(pipe, tres->latest ? tres->latest : resource,
2685                                 level, usage, box, transfer);
2686    threaded_transfer(*transfer)->valid_buffer_range = &tres->valid_buffer_range;
2687    threaded_transfer(*transfer)->cpu_storage_mapped = false;
2688 
2689    if (!(usage & TC_TRANSFER_MAP_THREADED_UNSYNC))
2690       tc_clear_driver_thread(tc);
2691 
2692    return ret;
2693 }
2694 
2695 static void *
tc_texture_map(struct pipe_context * _pipe,struct pipe_resource * resource,unsigned level,unsigned usage,const struct pipe_box * box,struct pipe_transfer ** transfer)2696 tc_texture_map(struct pipe_context *_pipe,
2697                struct pipe_resource *resource, unsigned level,
2698                unsigned usage, const struct pipe_box *box,
2699                struct pipe_transfer **transfer)
2700 {
2701    struct threaded_context *tc = threaded_context(_pipe);
2702    struct threaded_resource *tres = threaded_resource(resource);
2703    struct pipe_context *pipe = tc->pipe;
2704 
2705    tc_sync_msg(tc, "texture");
2706    tc_set_driver_thread(tc);
2707    /* block all unsync texture subdata during map */
2708    tc_set_resource_batch_usage_persistent(tc, resource, true);
2709 
2710    tc->bytes_mapped_estimate += box->width;
2711 
2712    void *ret = pipe->texture_map(pipe, tres->latest ? tres->latest : resource,
2713                                  level, usage, box, transfer);
2714 
2715    if (!(usage & TC_TRANSFER_MAP_THREADED_UNSYNC))
2716       tc_clear_driver_thread(tc);
2717 
2718    return ret;
2719 }
2720 
2721 struct tc_transfer_flush_region {
2722    struct tc_call_base base;
2723    struct pipe_box box;
2724    struct pipe_transfer *transfer;
2725 };
2726 
2727 static uint16_t ALWAYS_INLINE
tc_call_transfer_flush_region(struct pipe_context * pipe,void * call)2728 tc_call_transfer_flush_region(struct pipe_context *pipe, void *call)
2729 {
2730    struct tc_transfer_flush_region *p = to_call(call, tc_transfer_flush_region);
2731 
2732    pipe->transfer_flush_region(pipe, p->transfer, &p->box);
2733    return call_size(tc_transfer_flush_region);
2734 }
2735 
2736 struct tc_resource_copy_region {
2737    struct tc_call_base base;
2738    unsigned dst_level;
2739    unsigned dstx, dsty, dstz;
2740    unsigned src_level;
2741    struct pipe_box src_box;
2742    struct pipe_resource *dst;
2743    struct pipe_resource *src;
2744 };
2745 
2746 static void
2747 tc_resource_copy_region(struct pipe_context *_pipe,
2748                         struct pipe_resource *dst, unsigned dst_level,
2749                         unsigned dstx, unsigned dsty, unsigned dstz,
2750                         struct pipe_resource *src, unsigned src_level,
2751                         const struct pipe_box *src_box);
2752 
2753 static void
tc_buffer_do_flush_region(struct threaded_context * tc,struct threaded_transfer * ttrans,const struct pipe_box * box)2754 tc_buffer_do_flush_region(struct threaded_context *tc,
2755                           struct threaded_transfer *ttrans,
2756                           const struct pipe_box *box)
2757 {
2758    struct threaded_resource *tres = threaded_resource(ttrans->b.resource);
2759 
2760    if (ttrans->staging) {
2761       struct pipe_box src_box;
2762 
2763       u_box_1d(ttrans->b.offset + ttrans->b.box.x % tc->map_buffer_alignment +
2764                (box->x - ttrans->b.box.x),
2765                box->width, &src_box);
2766 
2767       /* Copy the staging buffer into the original one. */
2768       tc_resource_copy_region(&tc->base, ttrans->b.resource, 0, box->x, 0, 0,
2769                               ttrans->staging, 0, &src_box);
2770    }
2771 
2772    /* Don't update the valid range when we're uploading the CPU storage
2773     * because it includes the uninitialized range too.
2774     */
2775    if (!(ttrans->b.usage & TC_TRANSFER_MAP_UPLOAD_CPU_STORAGE)) {
2776       util_range_add(&tres->b, ttrans->valid_buffer_range,
2777                      box->x, box->x + box->width);
2778    }
2779 }
2780 
2781 static void
tc_transfer_flush_region(struct pipe_context * _pipe,struct pipe_transfer * transfer,const struct pipe_box * rel_box)2782 tc_transfer_flush_region(struct pipe_context *_pipe,
2783                          struct pipe_transfer *transfer,
2784                          const struct pipe_box *rel_box)
2785 {
2786    struct threaded_context *tc = threaded_context(_pipe);
2787    struct threaded_transfer *ttrans = threaded_transfer(transfer);
2788    struct threaded_resource *tres = threaded_resource(transfer->resource);
2789    unsigned required_usage = PIPE_MAP_WRITE |
2790                              PIPE_MAP_FLUSH_EXPLICIT;
2791 
2792    if (tres->b.target == PIPE_BUFFER) {
2793       if ((transfer->usage & required_usage) == required_usage) {
2794          struct pipe_box box;
2795 
2796          u_box_1d(transfer->box.x + rel_box->x, rel_box->width, &box);
2797          tc_buffer_do_flush_region(tc, ttrans, &box);
2798       }
2799 
2800       /* Staging transfers don't send the call to the driver.
2801        *
2802        * Transfers using the CPU storage shouldn't call transfer_flush_region
2803        * in the driver because the buffer is not really mapped on the driver
2804        * side and the CPU storage always re-uploads everything (flush_region
2805        * makes no difference).
2806        */
2807       if (ttrans->staging || ttrans->cpu_storage_mapped)
2808          return;
2809    }
2810 
2811    struct tc_transfer_flush_region *p =
2812       tc_add_call(tc, TC_CALL_transfer_flush_region, tc_transfer_flush_region);
2813    p->transfer = transfer;
2814    p->box = *rel_box;
2815 }
2816 
2817 struct tc_buffer_unmap {
2818    struct tc_call_base base;
2819    bool was_staging_transfer;
2820    union {
2821       struct pipe_transfer *transfer;
2822       struct pipe_resource *resource;
2823    };
2824 };
2825 
2826 static uint16_t ALWAYS_INLINE
tc_call_buffer_unmap(struct pipe_context * pipe,void * call)2827 tc_call_buffer_unmap(struct pipe_context *pipe, void *call)
2828 {
2829    struct tc_buffer_unmap *p = to_call(call, tc_buffer_unmap);
2830 
2831    if (p->was_staging_transfer) {
2832       struct threaded_resource *tres = threaded_resource(p->resource);
2833       /* Nothing to do except keeping track of staging uploads */
2834       assert(tres->pending_staging_uploads > 0);
2835       p_atomic_dec(&tres->pending_staging_uploads);
2836       tc_drop_resource_reference(p->resource);
2837    } else {
2838       pipe->buffer_unmap(pipe, p->transfer);
2839    }
2840 
2841    return call_size(tc_buffer_unmap);
2842 }
2843 
2844 static void
tc_buffer_unmap(struct pipe_context * _pipe,struct pipe_transfer * transfer)2845 tc_buffer_unmap(struct pipe_context *_pipe, struct pipe_transfer *transfer)
2846 {
2847    struct threaded_context *tc = threaded_context(_pipe);
2848    struct threaded_transfer *ttrans = threaded_transfer(transfer);
2849    struct threaded_resource *tres = threaded_resource(transfer->resource);
2850 
2851    /* PIPE_MAP_THREAD_SAFE is only valid with UNSYNCHRONIZED. It can be
2852     * called from any thread and bypasses all multithreaded queues.
2853     */
2854    if (transfer->usage & PIPE_MAP_THREAD_SAFE) {
2855       assert(transfer->usage & PIPE_MAP_UNSYNCHRONIZED);
2856       assert(!(transfer->usage & (PIPE_MAP_FLUSH_EXPLICIT |
2857                                   PIPE_MAP_DISCARD_RANGE)));
2858 
2859       struct pipe_context *pipe = tc->pipe;
2860       util_range_add(&tres->b, ttrans->valid_buffer_range,
2861                       transfer->box.x, transfer->box.x + transfer->box.width);
2862 
2863       pipe->buffer_unmap(pipe, transfer);
2864       return;
2865    }
2866 
2867    if (transfer->usage & PIPE_MAP_WRITE &&
2868        !(transfer->usage & PIPE_MAP_FLUSH_EXPLICIT))
2869       tc_buffer_do_flush_region(tc, ttrans, &transfer->box);
2870 
2871    if (ttrans->cpu_storage_mapped) {
2872       /* GL allows simultaneous GPU stores with mapped buffers as long as GPU stores don't
2873        * touch the mapped range. That's a problem because GPU stores free the CPU storage.
2874        * If that happens, we just ignore the unmap call and don't upload anything to prevent
2875        * a crash.
2876        *
2877        * Disallow the CPU storage in the driver to work around this.
2878        */
2879       assert(tres->cpu_storage);
2880 
2881       if (tres->cpu_storage) {
2882          tc_invalidate_buffer(tc, tres);
2883          tc_buffer_subdata(&tc->base, &tres->b,
2884                            PIPE_MAP_UNSYNCHRONIZED |
2885                            TC_TRANSFER_MAP_UPLOAD_CPU_STORAGE,
2886                            0, tres->b.width0, tres->cpu_storage);
2887          /* This shouldn't have been freed by buffer_subdata. */
2888          assert(tres->cpu_storage);
2889       } else {
2890          static bool warned_once = false;
2891          if (!warned_once) {
2892             fprintf(stderr, "This application is incompatible with cpu_storage.\n");
2893             fprintf(stderr, "Use tc_max_cpu_storage_size=0 to disable it and report this issue to Mesa.\n");
2894             warned_once = true;
2895          }
2896       }
2897 
2898       tc_drop_resource_reference(ttrans->staging);
2899       slab_free(&tc->pool_transfers, ttrans);
2900       return;
2901    }
2902 
2903    bool was_staging_transfer = false;
2904 
2905    if (ttrans->staging) {
2906       was_staging_transfer = true;
2907 
2908       tc_drop_resource_reference(ttrans->staging);
2909       slab_free(&tc->pool_transfers, ttrans);
2910    }
2911 
2912    struct tc_buffer_unmap *p = tc_add_call(tc, TC_CALL_buffer_unmap,
2913                                            tc_buffer_unmap);
2914    if (was_staging_transfer) {
2915       tc_set_resource_reference(&p->resource, &tres->b);
2916       p->was_staging_transfer = true;
2917    } else {
2918       p->transfer = transfer;
2919       p->was_staging_transfer = false;
2920    }
2921 
2922    /* tc_buffer_map directly maps the buffers, but tc_buffer_unmap
2923     * defers the unmap operation to the batch execution.
2924     * bytes_mapped_estimate is an estimation of the map/unmap bytes delta
2925     * and if it goes over an optional limit the current batch is flushed,
2926     * to reclaim some RAM. */
2927    if (!ttrans->staging && tc->bytes_mapped_limit &&
2928        tc->bytes_mapped_estimate > tc->bytes_mapped_limit) {
2929       tc_flush(_pipe, NULL, PIPE_FLUSH_ASYNC);
2930    }
2931 }
2932 
2933 struct tc_texture_unmap {
2934    struct tc_call_base base;
2935    struct pipe_transfer *transfer;
2936 };
2937 
2938 static uint16_t ALWAYS_INLINE
tc_call_texture_unmap(struct pipe_context * pipe,void * call)2939 tc_call_texture_unmap(struct pipe_context *pipe, void *call)
2940 {
2941    struct tc_texture_unmap *p = (struct tc_texture_unmap *) call;
2942 
2943    pipe->texture_unmap(pipe, p->transfer);
2944    return call_size(tc_texture_unmap);
2945 }
2946 
2947 static void
tc_texture_unmap(struct pipe_context * _pipe,struct pipe_transfer * transfer)2948 tc_texture_unmap(struct pipe_context *_pipe, struct pipe_transfer *transfer)
2949 {
2950    struct threaded_context *tc = threaded_context(_pipe);
2951    struct threaded_transfer *ttrans = threaded_transfer(transfer);
2952 
2953    /* enable subdata again once resource is no longer mapped */
2954    tc_set_resource_batch_usage_persistent(tc, transfer->resource, false);
2955 
2956    tc_add_call(tc, TC_CALL_texture_unmap, tc_texture_unmap)->transfer = transfer;
2957 
2958    /* tc_texture_map directly maps the textures, but tc_texture_unmap
2959     * defers the unmap operation to the batch execution.
2960     * bytes_mapped_estimate is an estimation of the map/unmap bytes delta
2961     * and if it goes over an optional limit the current batch is flushed,
2962     * to reclaim some RAM. */
2963    if (!ttrans->staging && tc->bytes_mapped_limit &&
2964        tc->bytes_mapped_estimate > tc->bytes_mapped_limit) {
2965       tc_flush(_pipe, NULL, PIPE_FLUSH_ASYNC);
2966    }
2967 }
2968 
2969 struct tc_buffer_subdata {
2970    struct tc_call_base base;
2971    unsigned usage, offset, size;
2972    struct pipe_resource *resource;
2973    char slot[0]; /* more will be allocated if needed */
2974 };
2975 
2976 static uint16_t ALWAYS_INLINE
tc_call_buffer_subdata(struct pipe_context * pipe,void * call)2977 tc_call_buffer_subdata(struct pipe_context *pipe, void *call)
2978 {
2979    struct tc_buffer_subdata *p = (struct tc_buffer_subdata *)call;
2980 
2981    pipe->buffer_subdata(pipe, p->resource, p->usage, p->offset, p->size,
2982                         p->slot);
2983    tc_drop_resource_reference(p->resource);
2984    return p->base.num_slots;
2985 }
2986 
2987 static bool
is_mergeable_buffer_subdata(const struct tc_call_base * previous_call,unsigned usage,unsigned offset,struct pipe_resource * resource)2988 is_mergeable_buffer_subdata(const struct tc_call_base *previous_call,
2989                             unsigned usage, unsigned offset,
2990                             struct pipe_resource *resource)
2991 {
2992    if (!previous_call || previous_call->call_id != TC_CALL_buffer_subdata)
2993       return false;
2994 
2995    struct tc_buffer_subdata *subdata = (struct tc_buffer_subdata *)previous_call;
2996 
2997    return subdata->usage == usage && subdata->resource == resource
2998           && (subdata->offset + subdata->size) == offset;
2999 }
3000 
3001 static void
tc_buffer_subdata(struct pipe_context * _pipe,struct pipe_resource * resource,unsigned usage,unsigned offset,unsigned size,const void * data)3002 tc_buffer_subdata(struct pipe_context *_pipe,
3003                   struct pipe_resource *resource,
3004                   unsigned usage, unsigned offset,
3005                   unsigned size, const void *data)
3006 {
3007    struct threaded_context *tc = threaded_context(_pipe);
3008    struct threaded_resource *tres = threaded_resource(resource);
3009 
3010    if (!size)
3011       return;
3012 
3013    usage |= PIPE_MAP_WRITE;
3014 
3015    /* PIPE_MAP_DIRECTLY supresses implicit DISCARD_RANGE. */
3016    if (!(usage & PIPE_MAP_DIRECTLY))
3017       usage |= PIPE_MAP_DISCARD_RANGE;
3018 
3019    usage = tc_improve_map_buffer_flags(tc, tres, usage, offset, size);
3020 
3021    /* Unsychronized and big transfers should use transfer_map. Also handle
3022     * full invalidations, because drivers aren't allowed to do them.
3023     */
3024    if (usage & (PIPE_MAP_UNSYNCHRONIZED |
3025                 PIPE_MAP_DISCARD_WHOLE_RESOURCE) ||
3026        size > TC_MAX_SUBDATA_BYTES ||
3027        tres->cpu_storage) {
3028       struct pipe_transfer *transfer;
3029       struct pipe_box box;
3030       uint8_t *map = NULL;
3031 
3032       u_box_1d(offset, size, &box);
3033 
3034       /* CPU storage is only useful for partial updates. It can add overhead
3035        * on glBufferData calls so avoid using it.
3036        */
3037       if (!tres->cpu_storage && offset == 0 && size == resource->width0)
3038          usage |= TC_TRANSFER_MAP_UPLOAD_CPU_STORAGE;
3039 
3040       map = tc_buffer_map(_pipe, resource, 0, usage, &box, &transfer);
3041       if (map) {
3042          memcpy(map, data, size);
3043          tc_buffer_unmap(_pipe, transfer);
3044       }
3045       return;
3046    }
3047 
3048    util_range_add(&tres->b, &tres->valid_buffer_range, offset, offset + size);
3049 
3050    /* We can potentially merge this subdata call with the previous one (if any),
3051     * if the application does a whole-buffer upload piecewise. */
3052    {
3053       struct tc_call_base *last_call = tc_get_last_mergeable_call(tc);
3054       struct tc_buffer_subdata *merge_dest = (struct tc_buffer_subdata *)last_call;
3055 
3056       if (is_mergeable_buffer_subdata(last_call, usage, offset, resource) &&
3057          tc_enlarge_last_mergeable_call(tc, call_size_with_slots(tc_buffer_subdata, merge_dest->size + size))) {
3058          memcpy(merge_dest->slot + merge_dest->size, data, size);
3059          merge_dest->size += size;
3060 
3061          /* TODO: We *could* do an invalidate + upload here if we detect that
3062           * the merged subdata call overwrites the entire buffer. However, that's
3063           * a little complicated since we can't add further calls to our batch
3064           * until we have removed the merged subdata call, which means that
3065           * calling tc_invalidate_buffer before we have removed the call will
3066           * blow things up.
3067           *
3068           * Just leave a large, merged subdata call in the batch for now, which is
3069           * at least better than tons of tiny subdata calls.
3070           */
3071 
3072          return;
3073       }
3074    }
3075 
3076    /* The upload is small. Enqueue it. */
3077    struct tc_buffer_subdata *p =
3078       tc_add_slot_based_call(tc, TC_CALL_buffer_subdata, tc_buffer_subdata, size);
3079 
3080    tc_set_resource_reference(&p->resource, resource);
3081    /* This is will always be busy because if it wasn't, tc_improve_map_buffer-
3082     * _flags would set UNSYNCHRONIZED and we wouldn't get here.
3083     */
3084    tc_add_to_buffer_list(&tc->buffer_lists[tc->next_buf_list], resource);
3085    p->usage = usage;
3086    p->offset = offset;
3087    p->size = size;
3088    memcpy(p->slot, data, size);
3089 
3090    tc_mark_call_mergeable(tc, &p->base);
3091 }
3092 
3093 struct tc_texture_subdata {
3094    struct tc_call_base base;
3095    unsigned level, usage, stride;
3096    struct pipe_box box;
3097    struct pipe_resource *resource;
3098    uintptr_t layer_stride;
3099    char slot[0]; /* more will be allocated if needed */
3100 };
3101 
3102 static uint16_t ALWAYS_INLINE
tc_call_texture_subdata(struct pipe_context * pipe,void * call)3103 tc_call_texture_subdata(struct pipe_context *pipe, void *call)
3104 {
3105    struct tc_texture_subdata *p = (struct tc_texture_subdata *)call;
3106 
3107    pipe->texture_subdata(pipe, p->resource, p->level, p->usage, &p->box,
3108                          p->slot, p->stride, p->layer_stride);
3109    tc_drop_resource_reference(p->resource);
3110    return p->base.num_slots;
3111 }
3112 
3113 static void
tc_texture_subdata(struct pipe_context * _pipe,struct pipe_resource * resource,unsigned level,unsigned usage,const struct pipe_box * box,const void * data,unsigned stride,uintptr_t layer_stride)3114 tc_texture_subdata(struct pipe_context *_pipe,
3115                    struct pipe_resource *resource,
3116                    unsigned level, unsigned usage,
3117                    const struct pipe_box *box,
3118                    const void *data, unsigned stride,
3119                    uintptr_t layer_stride)
3120 {
3121    struct threaded_context *tc = threaded_context(_pipe);
3122    uint64_t size;
3123 
3124    assert(box->height >= 1);
3125    assert(box->depth >= 1);
3126 
3127    size = (box->depth - 1) * layer_stride +
3128           (box->height - 1) * (uint64_t)stride +
3129           box->width * util_format_get_blocksize(resource->format);
3130    if (!size)
3131       return;
3132 
3133    /* Small uploads can be enqueued, big uploads must sync. */
3134    if (size <= TC_MAX_SUBDATA_BYTES) {
3135       struct tc_texture_subdata *p =
3136          tc_add_slot_based_call(tc, TC_CALL_texture_subdata, tc_texture_subdata, size);
3137 
3138       tc_set_resource_batch_usage(tc, resource);
3139       tc_set_resource_reference(&p->resource, resource);
3140       p->level = level;
3141       p->usage = usage;
3142       p->box = *box;
3143       p->stride = stride;
3144       p->layer_stride = layer_stride;
3145       memcpy(p->slot, data, size);
3146    } else {
3147       struct pipe_context *pipe = tc->pipe;
3148       struct threaded_resource *tres = threaded_resource(resource);
3149       unsigned unsync_usage = TC_TRANSFER_MAP_THREADED_UNSYNC | PIPE_MAP_UNSYNCHRONIZED | PIPE_MAP_WRITE;
3150       bool can_unsync = !tc_resource_batch_usage_test_busy(tc, resource) &&
3151                         tc->options.is_resource_busy &&
3152                         !tc->options.is_resource_busy(tc->pipe->screen, tres->latest, usage | unsync_usage);
3153 
3154       if (!can_unsync && resource->usage != PIPE_USAGE_STAGING &&
3155           tc->options.parse_renderpass_info && tc->in_renderpass) {
3156          enum pipe_format format = resource->format;
3157          if (usage & PIPE_MAP_DEPTH_ONLY)
3158             format = util_format_get_depth_only(format);
3159          else if (usage & PIPE_MAP_STENCIL_ONLY)
3160             format = PIPE_FORMAT_S8_UINT;
3161 
3162          unsigned fmt_stride = util_format_get_stride(format, box->width);
3163          uint64_t fmt_layer_stride = util_format_get_2d_size(format, stride, box->height);
3164          assert(fmt_layer_stride * box->depth <= UINT32_MAX);
3165 
3166          struct pipe_resource *pres = pipe_buffer_create(pipe->screen, 0, PIPE_USAGE_STREAM, layer_stride * box->depth);
3167          pipe->buffer_subdata(pipe, pres, unsync_usage, 0, layer_stride * box->depth, data);
3168          struct pipe_box src_box = *box;
3169          src_box.x = src_box.y = src_box.z = 0;
3170 
3171          if (fmt_stride == stride && fmt_layer_stride == layer_stride) {
3172             /* if stride matches, single copy is fine*/
3173             tc->base.resource_copy_region(&tc->base, resource, level, box->x, box->y, box->z, pres, 0, &src_box);
3174          } else {
3175             /* if stride doesn't match, inline util_copy_box on the GPU and assume the driver will optimize */
3176             src_box.depth = 1;
3177             for (unsigned z = 0; z < box->depth; ++z, src_box.x = z * layer_stride) {
3178                unsigned dst_x = box->x, dst_y = box->y, width = box->width, height = box->height, dst_z = box->z + z;
3179                int blocksize = util_format_get_blocksize(format);
3180                int blockwidth = util_format_get_blockwidth(format);
3181                int blockheight = util_format_get_blockheight(format);
3182 
3183                assert(blocksize > 0);
3184                assert(blockwidth > 0);
3185                assert(blockheight > 0);
3186 
3187                dst_x /= blockwidth;
3188                dst_y /= blockheight;
3189                width = DIV_ROUND_UP(width, blockwidth);
3190                height = DIV_ROUND_UP(height, blockheight);
3191 
3192                width *= blocksize;
3193 
3194                if (width == fmt_stride && width == (unsigned)stride) {
3195                   ASSERTED uint64_t size = (uint64_t)height * width;
3196 
3197                   assert(size <= SIZE_MAX);
3198                   assert(dst_x + src_box.width < u_minify(pres->width0, level));
3199                   assert(dst_y + src_box.height < u_minify(pres->height0, level));
3200                   assert(pres->target != PIPE_TEXTURE_3D ||  z + src_box.depth < u_minify(pres->depth0, level));
3201                   tc->base.resource_copy_region(&tc->base, resource, level, dst_x, dst_y, dst_z, pres, 0, &src_box);
3202                } else {
3203                   src_box.height = 1;
3204                   for (unsigned i = 0; i < height; i++, dst_y++, src_box.x += stride)
3205                      tc->base.resource_copy_region(&tc->base, resource, level, dst_x, dst_y, dst_z, pres, 0, &src_box);
3206                }
3207             }
3208          }
3209 
3210          pipe_resource_reference(&pres, NULL);
3211       } else {
3212          if (can_unsync) {
3213             usage |= unsync_usage;
3214          } else {
3215             tc_sync(tc);
3216             tc_set_driver_thread(tc);
3217          }
3218          pipe->texture_subdata(pipe, resource, level, usage, box, data,
3219                               stride, layer_stride);
3220          if (!can_unsync)
3221             tc_clear_driver_thread(tc);
3222       }
3223    }
3224 }
3225 
3226 
3227 /********************************************************************
3228  * miscellaneous
3229  */
3230 
3231 #define TC_FUNC_SYNC_RET0(ret_type, func) \
3232    static ret_type \
3233    tc_##func(struct pipe_context *_pipe) \
3234    { \
3235       struct threaded_context *tc = threaded_context(_pipe); \
3236       struct pipe_context *pipe = tc->pipe; \
3237       tc_sync(tc); \
3238       return pipe->func(pipe); \
3239    }
3240 
TC_FUNC_SYNC_RET0(uint64_t,get_timestamp)3241 TC_FUNC_SYNC_RET0(uint64_t, get_timestamp)
3242 
3243 static void
3244 tc_get_sample_position(struct pipe_context *_pipe,
3245                        unsigned sample_count, unsigned sample_index,
3246                        float *out_value)
3247 {
3248    struct threaded_context *tc = threaded_context(_pipe);
3249    struct pipe_context *pipe = tc->pipe;
3250 
3251    pipe->get_sample_position(pipe, sample_count, sample_index,
3252                              out_value);
3253 }
3254 
3255 static enum pipe_reset_status
tc_get_device_reset_status(struct pipe_context * _pipe)3256 tc_get_device_reset_status(struct pipe_context *_pipe)
3257 {
3258    struct threaded_context *tc = threaded_context(_pipe);
3259    struct pipe_context *pipe = tc->pipe;
3260 
3261    if (!tc->options.unsynchronized_get_device_reset_status)
3262       tc_sync(tc);
3263 
3264    return pipe->get_device_reset_status(pipe);
3265 }
3266 
3267 static void
tc_set_device_reset_callback(struct pipe_context * _pipe,const struct pipe_device_reset_callback * cb)3268 tc_set_device_reset_callback(struct pipe_context *_pipe,
3269                              const struct pipe_device_reset_callback *cb)
3270 {
3271    struct threaded_context *tc = threaded_context(_pipe);
3272    struct pipe_context *pipe = tc->pipe;
3273 
3274    tc_sync(tc);
3275    pipe->set_device_reset_callback(pipe, cb);
3276 }
3277 
3278 struct tc_string_marker {
3279    struct tc_call_base base;
3280    int len;
3281    char slot[0]; /* more will be allocated if needed */
3282 };
3283 
3284 static uint16_t ALWAYS_INLINE
tc_call_emit_string_marker(struct pipe_context * pipe,void * call)3285 tc_call_emit_string_marker(struct pipe_context *pipe, void *call)
3286 {
3287    struct tc_string_marker *p = (struct tc_string_marker *)call;
3288    pipe->emit_string_marker(pipe, p->slot, p->len);
3289    return p->base.num_slots;
3290 }
3291 
3292 static void
tc_emit_string_marker(struct pipe_context * _pipe,const char * string,int len)3293 tc_emit_string_marker(struct pipe_context *_pipe,
3294                       const char *string, int len)
3295 {
3296    struct threaded_context *tc = threaded_context(_pipe);
3297 
3298    if (len <= TC_MAX_STRING_MARKER_BYTES) {
3299       struct tc_string_marker *p =
3300          tc_add_slot_based_call(tc, TC_CALL_emit_string_marker, tc_string_marker, len);
3301 
3302       memcpy(p->slot, string, len);
3303       p->len = len;
3304    } else {
3305       struct pipe_context *pipe = tc->pipe;
3306 
3307       tc_sync(tc);
3308       tc_set_driver_thread(tc);
3309       pipe->emit_string_marker(pipe, string, len);
3310       tc_clear_driver_thread(tc);
3311    }
3312 }
3313 
3314 static void
tc_dump_debug_state(struct pipe_context * _pipe,FILE * stream,unsigned flags)3315 tc_dump_debug_state(struct pipe_context *_pipe, FILE *stream,
3316                     unsigned flags)
3317 {
3318    struct threaded_context *tc = threaded_context(_pipe);
3319    struct pipe_context *pipe = tc->pipe;
3320 
3321    tc_sync(tc);
3322    pipe->dump_debug_state(pipe, stream, flags);
3323 }
3324 
3325 static void
tc_set_debug_callback(struct pipe_context * _pipe,const struct util_debug_callback * cb)3326 tc_set_debug_callback(struct pipe_context *_pipe,
3327                       const struct util_debug_callback *cb)
3328 {
3329    struct threaded_context *tc = threaded_context(_pipe);
3330    struct pipe_context *pipe = tc->pipe;
3331 
3332    tc_sync(tc);
3333 
3334    /* Drop all synchronous debug callbacks. Drivers are expected to be OK
3335     * with this. shader-db will use an environment variable to disable
3336     * the threaded context.
3337     */
3338    if (cb && !cb->async)
3339       pipe->set_debug_callback(pipe, NULL);
3340    else
3341       pipe->set_debug_callback(pipe, cb);
3342 }
3343 
3344 static void
tc_set_log_context(struct pipe_context * _pipe,struct u_log_context * log)3345 tc_set_log_context(struct pipe_context *_pipe, struct u_log_context *log)
3346 {
3347    struct threaded_context *tc = threaded_context(_pipe);
3348    struct pipe_context *pipe = tc->pipe;
3349 
3350    tc_sync(tc);
3351    pipe->set_log_context(pipe, log);
3352 }
3353 
3354 static void
tc_create_fence_fd(struct pipe_context * _pipe,struct pipe_fence_handle ** fence,int fd,enum pipe_fd_type type)3355 tc_create_fence_fd(struct pipe_context *_pipe,
3356                    struct pipe_fence_handle **fence, int fd,
3357                    enum pipe_fd_type type)
3358 {
3359    struct threaded_context *tc = threaded_context(_pipe);
3360    struct pipe_context *pipe = tc->pipe;
3361 
3362    if (!tc->options.unsynchronized_create_fence_fd)
3363       tc_sync(tc);
3364 
3365    pipe->create_fence_fd(pipe, fence, fd, type);
3366 }
3367 
3368 struct tc_fence_call {
3369    struct tc_call_base base;
3370    struct pipe_fence_handle *fence;
3371 };
3372 
3373 static uint16_t ALWAYS_INLINE
tc_call_fence_server_sync(struct pipe_context * pipe,void * call)3374 tc_call_fence_server_sync(struct pipe_context *pipe, void *call)
3375 {
3376    struct pipe_fence_handle *fence = to_call(call, tc_fence_call)->fence;
3377 
3378    pipe->fence_server_sync(pipe, fence);
3379    pipe->screen->fence_reference(pipe->screen, &fence, NULL);
3380    return call_size(tc_fence_call);
3381 }
3382 
3383 static void
tc_fence_server_sync(struct pipe_context * _pipe,struct pipe_fence_handle * fence)3384 tc_fence_server_sync(struct pipe_context *_pipe,
3385                      struct pipe_fence_handle *fence)
3386 {
3387    struct threaded_context *tc = threaded_context(_pipe);
3388    struct pipe_screen *screen = tc->pipe->screen;
3389    struct tc_fence_call *call = tc_add_call(tc, TC_CALL_fence_server_sync,
3390                                             tc_fence_call);
3391 
3392    call->fence = NULL;
3393    screen->fence_reference(screen, &call->fence, fence);
3394 }
3395 
3396 static void
tc_fence_server_signal(struct pipe_context * _pipe,struct pipe_fence_handle * fence)3397 tc_fence_server_signal(struct pipe_context *_pipe,
3398                            struct pipe_fence_handle *fence)
3399 {
3400    struct threaded_context *tc = threaded_context(_pipe);
3401    struct pipe_context *pipe = tc->pipe;
3402    tc_sync(tc);
3403    pipe->fence_server_signal(pipe, fence);
3404 }
3405 
3406 static struct pipe_video_codec *
tc_create_video_codec(UNUSED struct pipe_context * _pipe,UNUSED const struct pipe_video_codec * templ)3407 tc_create_video_codec(UNUSED struct pipe_context *_pipe,
3408                       UNUSED const struct pipe_video_codec *templ)
3409 {
3410    unreachable("Threaded context should not be enabled for video APIs");
3411    return NULL;
3412 }
3413 
3414 static struct pipe_video_buffer *
tc_create_video_buffer(UNUSED struct pipe_context * _pipe,UNUSED const struct pipe_video_buffer * templ)3415 tc_create_video_buffer(UNUSED struct pipe_context *_pipe,
3416                        UNUSED const struct pipe_video_buffer *templ)
3417 {
3418    unreachable("Threaded context should not be enabled for video APIs");
3419    return NULL;
3420 }
3421 
3422 struct tc_context_param {
3423    struct tc_call_base base;
3424    enum pipe_context_param param;
3425    unsigned value;
3426 };
3427 
3428 static uint16_t ALWAYS_INLINE
tc_call_set_context_param(struct pipe_context * pipe,void * call)3429 tc_call_set_context_param(struct pipe_context *pipe, void *call)
3430 {
3431    struct tc_context_param *p = to_call(call, tc_context_param);
3432 
3433    if (pipe->set_context_param)
3434       pipe->set_context_param(pipe, p->param, p->value);
3435 
3436    return call_size(tc_context_param);
3437 }
3438 
3439 static void
tc_set_context_param(struct pipe_context * _pipe,enum pipe_context_param param,unsigned value)3440 tc_set_context_param(struct pipe_context *_pipe,
3441                            enum pipe_context_param param,
3442                            unsigned value)
3443 {
3444    struct threaded_context *tc = threaded_context(_pipe);
3445 
3446    if (param == PIPE_CONTEXT_PARAM_UPDATE_THREAD_SCHEDULING) {
3447       util_thread_sched_apply_policy(tc->queue.threads[0],
3448                                      UTIL_THREAD_THREADED_CONTEXT, value,
3449                                      NULL);
3450 
3451       /* Execute this immediately (without enqueuing).
3452        * It's required to be thread-safe.
3453        */
3454       struct pipe_context *pipe = tc->pipe;
3455       if (pipe->set_context_param)
3456          pipe->set_context_param(pipe, param, value);
3457       return;
3458    }
3459 
3460    if (tc->pipe->set_context_param) {
3461       struct tc_context_param *call =
3462          tc_add_call(tc, TC_CALL_set_context_param, tc_context_param);
3463 
3464       call->param = param;
3465       call->value = value;
3466    }
3467 }
3468 
3469 
3470 /********************************************************************
3471  * draw, launch, clear, blit, copy, flush
3472  */
3473 
3474 struct tc_flush_deferred_call {
3475    struct tc_call_base base;
3476    unsigned flags;
3477    struct pipe_fence_handle *fence;
3478 };
3479 
3480 struct tc_flush_call {
3481    struct tc_call_base base;
3482    unsigned flags;
3483    struct pipe_fence_handle *fence;
3484    struct threaded_context *tc;
3485 };
3486 
3487 static void
tc_flush_queries(struct threaded_context * tc)3488 tc_flush_queries(struct threaded_context *tc)
3489 {
3490    struct threaded_query *tq, *tmp;
3491    LIST_FOR_EACH_ENTRY_SAFE(tq, tmp, &tc->unflushed_queries, head_unflushed) {
3492       list_del(&tq->head_unflushed);
3493 
3494       /* Memory release semantics: due to a possible race with
3495        * tc_get_query_result, we must ensure that the linked list changes
3496        * are visible before setting tq->flushed.
3497        */
3498       p_atomic_set(&tq->flushed, true);
3499    }
3500 }
3501 
3502 static uint16_t ALWAYS_INLINE
tc_call_flush_deferred(struct pipe_context * pipe,void * call)3503 tc_call_flush_deferred(struct pipe_context *pipe, void *call)
3504 {
3505    struct tc_flush_deferred_call *p = to_call(call, tc_flush_deferred_call);
3506    struct pipe_screen *screen = pipe->screen;
3507 
3508    pipe->flush(pipe, p->fence ? &p->fence : NULL, p->flags);
3509    screen->fence_reference(screen, &p->fence, NULL);
3510 
3511    return call_size(tc_flush_deferred_call);
3512 }
3513 
3514 static uint16_t ALWAYS_INLINE
tc_call_flush(struct pipe_context * pipe,void * call)3515 tc_call_flush(struct pipe_context *pipe, void *call)
3516 {
3517    struct tc_flush_call *p = to_call(call, tc_flush_call);
3518    struct pipe_screen *screen = pipe->screen;
3519 
3520    pipe->flush(pipe, p->fence ? &p->fence : NULL, p->flags);
3521    screen->fence_reference(screen, &p->fence, NULL);
3522 
3523    tc_flush_queries(p->tc);
3524 
3525    return call_size(tc_flush_call);
3526 }
3527 
3528 static void
tc_flush(struct pipe_context * _pipe,struct pipe_fence_handle ** fence,unsigned flags)3529 tc_flush(struct pipe_context *_pipe, struct pipe_fence_handle **fence,
3530          unsigned flags)
3531 {
3532    struct threaded_context *tc = threaded_context(_pipe);
3533    struct pipe_context *pipe = tc->pipe;
3534    struct pipe_screen *screen = pipe->screen;
3535    bool async = flags & (PIPE_FLUSH_DEFERRED | PIPE_FLUSH_ASYNC);
3536    bool deferred = (flags & PIPE_FLUSH_DEFERRED) > 0;
3537 
3538    if (!deferred || !fence)
3539       tc->in_renderpass = false;
3540 
3541    if (async && tc->options.create_fence) {
3542       if (fence) {
3543          struct tc_batch *next = &tc->batch_slots[tc->next];
3544 
3545          if (!next->token) {
3546             next->token = malloc(sizeof(*next->token));
3547             if (!next->token)
3548                goto out_of_memory;
3549 
3550             pipe_reference_init(&next->token->ref, 1);
3551             next->token->tc = tc;
3552          }
3553 
3554          screen->fence_reference(screen, fence,
3555                                  tc->options.create_fence(pipe, next->token));
3556          if (!*fence)
3557             goto out_of_memory;
3558       }
3559 
3560       struct tc_flush_call *p;
3561       if (deferred) {
3562          /* these have identical fields */
3563          p = (struct tc_flush_call *)tc_add_call(tc, TC_CALL_flush_deferred, tc_flush_deferred_call);
3564       } else {
3565          p = tc_add_call(tc, TC_CALL_flush, tc_flush_call);
3566          p->tc = tc;
3567       }
3568       p->fence = fence ? *fence : NULL;
3569       p->flags = flags | TC_FLUSH_ASYNC;
3570 
3571       if (!deferred) {
3572          /* non-deferred async flushes indicate completion of existing renderpass info */
3573          tc_signal_renderpass_info_ready(tc);
3574          tc_batch_flush(tc, false);
3575          tc->seen_fb_state = false;
3576       }
3577 
3578       return;
3579    }
3580 
3581 out_of_memory:
3582    tc->flushing = true;
3583    /* renderpass info is signaled during sync */
3584    tc_sync_msg(tc, flags & PIPE_FLUSH_END_OF_FRAME ? "end of frame" :
3585                    flags & PIPE_FLUSH_DEFERRED ? "deferred fence" : "normal");
3586 
3587    if (!deferred) {
3588       tc_flush_queries(tc);
3589       tc->seen_fb_state = false;
3590       tc->query_ended = false;
3591    }
3592    tc_set_driver_thread(tc);
3593    pipe->flush(pipe, fence, flags);
3594    tc_clear_driver_thread(tc);
3595    tc->flushing = false;
3596 }
3597 
3598 struct tc_draw_single_drawid {
3599    struct tc_draw_single base;
3600    unsigned drawid_offset;
3601 };
3602 
3603 static uint16_t ALWAYS_INLINE
tc_call_draw_single_drawid(struct pipe_context * pipe,void * call)3604 tc_call_draw_single_drawid(struct pipe_context *pipe, void *call)
3605 {
3606    struct tc_draw_single_drawid *info_drawid = to_call(call, tc_draw_single_drawid);
3607    struct tc_draw_single *info = &info_drawid->base;
3608 
3609    /* u_threaded_context stores start/count in min/max_index for single draws. */
3610    /* Drivers using u_threaded_context shouldn't use min/max_index. */
3611    struct pipe_draw_start_count_bias draw;
3612 
3613    draw.start = info->info.min_index;
3614    draw.count = info->info.max_index;
3615    draw.index_bias = info->index_bias;
3616 
3617    info->info.index_bounds_valid = false;
3618    info->info.has_user_indices = false;
3619    info->info.take_index_buffer_ownership = false;
3620 
3621    pipe->draw_vbo(pipe, &info->info, info_drawid->drawid_offset, NULL, &draw, 1);
3622    if (info->info.index_size)
3623       tc_drop_resource_reference(info->info.index.resource);
3624 
3625    return call_size(tc_draw_single_drawid);
3626 }
3627 
3628 static void
simplify_draw_info(struct pipe_draw_info * info)3629 simplify_draw_info(struct pipe_draw_info *info)
3630 {
3631    /* Clear these fields to facilitate draw merging.
3632     * Drivers shouldn't use them.
3633     */
3634    info->has_user_indices = false;
3635    info->index_bounds_valid = false;
3636    info->take_index_buffer_ownership = false;
3637    info->index_bias_varies = false;
3638    info->_pad = 0;
3639 
3640    /* This shouldn't be set when merging single draws. */
3641    info->increment_draw_id = false;
3642 
3643    if (info->index_size) {
3644       if (!info->primitive_restart)
3645          info->restart_index = 0;
3646    } else {
3647       assert(!info->primitive_restart);
3648       info->primitive_restart = false;
3649       info->restart_index = 0;
3650       info->index.resource = NULL;
3651    }
3652 }
3653 
3654 static bool
is_next_call_a_mergeable_draw(struct tc_draw_single * first,struct tc_draw_single * next)3655 is_next_call_a_mergeable_draw(struct tc_draw_single *first,
3656                               struct tc_draw_single *next)
3657 {
3658    if (next->base.call_id != TC_CALL_draw_single)
3659       return false;
3660 
3661    STATIC_ASSERT(offsetof(struct pipe_draw_info, min_index) ==
3662                  sizeof(struct pipe_draw_info) - 8);
3663    STATIC_ASSERT(offsetof(struct pipe_draw_info, max_index) ==
3664                  sizeof(struct pipe_draw_info) - 4);
3665    /* All fields must be the same except start and count. */
3666    /* u_threaded_context stores start/count in min/max_index for single draws. */
3667    return memcmp((uint32_t*)&first->info, (uint32_t*)&next->info,
3668                  DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX) == 0;
3669 }
3670 
3671 static uint16_t ALWAYS_INLINE
tc_call_draw_single(struct pipe_context * pipe,void * call)3672 tc_call_draw_single(struct pipe_context *pipe, void *call)
3673 {
3674    /* Draw call merging. */
3675    struct tc_draw_single *first = to_call(call, tc_draw_single);
3676    struct tc_draw_single *next = get_next_call(first, tc_draw_single);
3677 
3678    /* If at least 2 consecutive draw calls can be merged... */
3679    if (next->base.call_id == TC_CALL_draw_single) {
3680       if (is_next_call_a_mergeable_draw(first, next)) {
3681          /* The maximum number of merged draws is given by the batch size. */
3682          struct pipe_draw_start_count_bias multi[TC_SLOTS_PER_BATCH / call_size(tc_draw_single)];
3683          unsigned num_draws = 2;
3684          bool index_bias_varies = first->index_bias != next->index_bias;
3685 
3686          /* u_threaded_context stores start/count in min/max_index for single draws. */
3687          multi[0].start = first->info.min_index;
3688          multi[0].count = first->info.max_index;
3689          multi[0].index_bias = first->index_bias;
3690          multi[1].start = next->info.min_index;
3691          multi[1].count = next->info.max_index;
3692          multi[1].index_bias = next->index_bias;
3693 
3694          /* Find how many other draws can be merged. */
3695          next = get_next_call(next, tc_draw_single);
3696          for (; is_next_call_a_mergeable_draw(first, next);
3697               next = get_next_call(next, tc_draw_single), num_draws++) {
3698             /* u_threaded_context stores start/count in min/max_index for single draws. */
3699             multi[num_draws].start = next->info.min_index;
3700             multi[num_draws].count = next->info.max_index;
3701             multi[num_draws].index_bias = next->index_bias;
3702             index_bias_varies |= first->index_bias != next->index_bias;
3703          }
3704 
3705          first->info.index_bias_varies = index_bias_varies;
3706          pipe->draw_vbo(pipe, &first->info, 0, NULL, multi, num_draws);
3707 
3708          /* Since all draws use the same index buffer, drop all references at once. */
3709          if (first->info.index_size)
3710             pipe_drop_resource_references(first->info.index.resource, num_draws);
3711 
3712          return call_size(tc_draw_single) * num_draws;
3713       }
3714    }
3715 
3716    /* u_threaded_context stores start/count in min/max_index for single draws. */
3717    /* Drivers using u_threaded_context shouldn't use min/max_index. */
3718    struct pipe_draw_start_count_bias draw;
3719 
3720    draw.start = first->info.min_index;
3721    draw.count = first->info.max_index;
3722    draw.index_bias = first->index_bias;
3723 
3724    first->info.index_bounds_valid = false;
3725    first->info.has_user_indices = false;
3726    first->info.take_index_buffer_ownership = false;
3727 
3728    pipe->draw_vbo(pipe, &first->info, 0, NULL, &draw, 1);
3729    if (first->info.index_size)
3730       tc_drop_resource_reference(first->info.index.resource);
3731 
3732    return call_size(tc_draw_single);
3733 }
3734 
3735 struct tc_draw_indirect {
3736    struct tc_call_base base;
3737    struct pipe_draw_start_count_bias draw;
3738    struct pipe_draw_info info;
3739    struct pipe_draw_indirect_info indirect;
3740 };
3741 
3742 static uint16_t ALWAYS_INLINE
tc_call_draw_indirect(struct pipe_context * pipe,void * call)3743 tc_call_draw_indirect(struct pipe_context *pipe, void *call)
3744 {
3745    struct tc_draw_indirect *info = to_call(call, tc_draw_indirect);
3746 
3747    info->info.index_bounds_valid = false;
3748    info->info.take_index_buffer_ownership = false;
3749 
3750    pipe->draw_vbo(pipe, &info->info, 0, &info->indirect, &info->draw, 1);
3751    if (info->info.index_size)
3752       tc_drop_resource_reference(info->info.index.resource);
3753 
3754    tc_drop_resource_reference(info->indirect.buffer);
3755    tc_drop_resource_reference(info->indirect.indirect_draw_count);
3756    tc_drop_so_target_reference(info->indirect.count_from_stream_output);
3757    return call_size(tc_draw_indirect);
3758 }
3759 
3760 struct tc_draw_multi {
3761    struct tc_call_base base;
3762    unsigned num_draws;
3763    struct pipe_draw_info info;
3764    struct pipe_draw_start_count_bias slot[]; /* variable-sized array */
3765 };
3766 
3767 static uint16_t ALWAYS_INLINE
tc_call_draw_multi(struct pipe_context * pipe,void * call)3768 tc_call_draw_multi(struct pipe_context *pipe, void *call)
3769 {
3770    struct tc_draw_multi *info = (struct tc_draw_multi*)call;
3771 
3772    info->info.has_user_indices = false;
3773    info->info.index_bounds_valid = false;
3774    info->info.take_index_buffer_ownership = false;
3775 
3776    pipe->draw_vbo(pipe, &info->info, 0, NULL, info->slot, info->num_draws);
3777    if (info->info.index_size)
3778       tc_drop_resource_reference(info->info.index.resource);
3779 
3780    return info->base.num_slots;
3781 }
3782 
3783 #define DRAW_INFO_SIZE_WITHOUT_INDEXBUF_AND_MIN_MAX_INDEX \
3784    offsetof(struct pipe_draw_info, index)
3785 
3786 /* Single draw with drawid_offset == 0. */
3787 static void
tc_draw_single(struct pipe_context * _pipe,const struct pipe_draw_info * info,unsigned drawid_offset,const struct pipe_draw_indirect_info * indirect,const struct pipe_draw_start_count_bias * draws,unsigned num_draws)3788 tc_draw_single(struct pipe_context *_pipe, const struct pipe_draw_info *info,
3789                unsigned drawid_offset,
3790                const struct pipe_draw_indirect_info *indirect,
3791                const struct pipe_draw_start_count_bias *draws,
3792                unsigned num_draws)
3793 {
3794    struct threaded_context *tc = threaded_context(_pipe);
3795    struct tc_draw_single *p =
3796       tc_add_call(tc, TC_CALL_draw_single, tc_draw_single);
3797 
3798    if (info->index_size) {
3799       if (!info->take_index_buffer_ownership) {
3800          tc_set_resource_reference(&p->info.index.resource,
3801                                    info->index.resource);
3802       }
3803       tc_add_to_buffer_list(&tc->buffer_lists[tc->next_buf_list], info->index.resource);
3804    }
3805    memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX);
3806    /* u_threaded_context stores start/count in min/max_index for single draws. */
3807    p->info.min_index = draws[0].start;
3808    p->info.max_index = draws[0].count;
3809    p->index_bias = draws[0].index_bias;
3810    simplify_draw_info(&p->info);
3811 }
3812 
3813 /* Single draw with drawid_offset > 0. */
3814 static void
tc_draw_single_draw_id(struct pipe_context * _pipe,const struct pipe_draw_info * info,unsigned drawid_offset,const struct pipe_draw_indirect_info * indirect,const struct pipe_draw_start_count_bias * draws,unsigned num_draws)3815 tc_draw_single_draw_id(struct pipe_context *_pipe,
3816                        const struct pipe_draw_info *info,
3817                        unsigned drawid_offset,
3818                        const struct pipe_draw_indirect_info *indirect,
3819                        const struct pipe_draw_start_count_bias *draws,
3820                        unsigned num_draws)
3821 {
3822    struct threaded_context *tc = threaded_context(_pipe);
3823    struct tc_draw_single *p =
3824       &tc_add_call(tc, TC_CALL_draw_single_drawid, tc_draw_single_drawid)->base;
3825 
3826    if (info->index_size) {
3827       if (!info->take_index_buffer_ownership) {
3828          tc_set_resource_reference(&p->info.index.resource,
3829                                    info->index.resource);
3830       }
3831       tc_add_to_buffer_list(&tc->buffer_lists[tc->next_buf_list], info->index.resource);
3832    }
3833    ((struct tc_draw_single_drawid*)p)->drawid_offset = drawid_offset;
3834    memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX);
3835    /* u_threaded_context stores start/count in min/max_index for single draws. */
3836    p->info.min_index = draws[0].start;
3837    p->info.max_index = draws[0].count;
3838    p->index_bias = draws[0].index_bias;
3839    simplify_draw_info(&p->info);
3840 }
3841 
3842 /* Single draw with user indices and drawid_offset == 0. */
3843 static void
tc_draw_user_indices_single(struct pipe_context * _pipe,const struct pipe_draw_info * info,unsigned drawid_offset,const struct pipe_draw_indirect_info * indirect,const struct pipe_draw_start_count_bias * draws,unsigned num_draws)3844 tc_draw_user_indices_single(struct pipe_context *_pipe,
3845                             const struct pipe_draw_info *info,
3846                             unsigned drawid_offset,
3847                             const struct pipe_draw_indirect_info *indirect,
3848                             const struct pipe_draw_start_count_bias *draws,
3849                             unsigned num_draws)
3850 {
3851    struct threaded_context *tc = threaded_context(_pipe);
3852    unsigned index_size = info->index_size;
3853    unsigned size = draws[0].count * index_size;
3854    struct pipe_resource *buffer = NULL;
3855    unsigned offset;
3856 
3857    if (!size)
3858       return;
3859 
3860    /* This must be done before adding draw_vbo, because it could generate
3861     * e.g. transfer_unmap and flush partially-uninitialized draw_vbo
3862     * to the driver if it was done afterwards.
3863     */
3864    u_upload_data(tc->base.stream_uploader, 0, size, 4,
3865                  (uint8_t*)info->index.user + draws[0].start * index_size,
3866                  &offset, &buffer);
3867    if (unlikely(!buffer))
3868       return;
3869 
3870    struct tc_draw_single *p =
3871       tc_add_call(tc, TC_CALL_draw_single, tc_draw_single);
3872    memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_INDEXBUF_AND_MIN_MAX_INDEX);
3873    p->info.index.resource = buffer;
3874    /* u_threaded_context stores start/count in min/max_index for single draws. */
3875    p->info.min_index = offset >> util_logbase2(index_size);
3876    p->info.max_index = draws[0].count;
3877    p->index_bias = draws[0].index_bias;
3878    simplify_draw_info(&p->info);
3879 }
3880 
3881 /* Single draw with user indices and drawid_offset > 0. */
3882 static void
tc_draw_user_indices_single_draw_id(struct pipe_context * _pipe,const struct pipe_draw_info * info,unsigned drawid_offset,const struct pipe_draw_indirect_info * indirect,const struct pipe_draw_start_count_bias * draws,unsigned num_draws)3883 tc_draw_user_indices_single_draw_id(struct pipe_context *_pipe,
3884                                     const struct pipe_draw_info *info,
3885                                     unsigned drawid_offset,
3886                                     const struct pipe_draw_indirect_info *indirect,
3887                                     const struct pipe_draw_start_count_bias *draws,
3888                                     unsigned num_draws)
3889 {
3890    struct threaded_context *tc = threaded_context(_pipe);
3891    unsigned index_size = info->index_size;
3892    unsigned size = draws[0].count * index_size;
3893    struct pipe_resource *buffer = NULL;
3894    unsigned offset;
3895 
3896    if (!size)
3897       return;
3898 
3899    /* This must be done before adding draw_vbo, because it could generate
3900     * e.g. transfer_unmap and flush partially-uninitialized draw_vbo
3901     * to the driver if it was done afterwards.
3902     */
3903    u_upload_data(tc->base.stream_uploader, 0, size, 4,
3904                  (uint8_t*)info->index.user + draws[0].start * index_size,
3905                  &offset, &buffer);
3906    if (unlikely(!buffer))
3907       return;
3908 
3909    struct tc_draw_single *p =
3910       &tc_add_call(tc, TC_CALL_draw_single_drawid, tc_draw_single_drawid)->base;
3911    memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_INDEXBUF_AND_MIN_MAX_INDEX);
3912    p->info.index.resource = buffer;
3913    ((struct tc_draw_single_drawid*)p)->drawid_offset = drawid_offset;
3914    /* u_threaded_context stores start/count in min/max_index for single draws. */
3915    p->info.min_index = offset >> util_logbase2(index_size);
3916    p->info.max_index = draws[0].count;
3917    p->index_bias = draws[0].index_bias;
3918    simplify_draw_info(&p->info);
3919 }
3920 
3921 #define DRAW_OVERHEAD_BYTES sizeof(struct tc_draw_multi)
3922 #define ONE_DRAW_SLOT_BYTES sizeof(((struct tc_draw_multi*)NULL)->slot[0])
3923 
3924 #define SLOTS_FOR_ONE_DRAW \
3925    DIV_ROUND_UP(DRAW_OVERHEAD_BYTES + ONE_DRAW_SLOT_BYTES, \
3926                 sizeof(struct tc_call_base))
3927 
3928 static void
tc_draw_multi(struct pipe_context * _pipe,const struct pipe_draw_info * info,unsigned drawid_offset,const struct pipe_draw_indirect_info * indirect,const struct pipe_draw_start_count_bias * draws,unsigned num_draws)3929 tc_draw_multi(struct pipe_context *_pipe, const struct pipe_draw_info *info,
3930               unsigned drawid_offset,
3931               const struct pipe_draw_indirect_info *indirect,
3932               const struct pipe_draw_start_count_bias *draws,
3933               unsigned num_draws)
3934 {
3935    struct threaded_context *tc = threaded_context(_pipe);
3936    int total_offset = 0;
3937    bool take_index_buffer_ownership = info->take_index_buffer_ownership;
3938 
3939    while (num_draws) {
3940       struct tc_batch *next = &tc->batch_slots[tc->next];
3941 
3942       int nb_slots_left = TC_SLOTS_PER_BATCH - 1 - next->num_total_slots;
3943       /* If there isn't enough place for one draw, try to fill the next one */
3944       if (nb_slots_left < SLOTS_FOR_ONE_DRAW)
3945          nb_slots_left = TC_SLOTS_PER_BATCH - 1;
3946       const int size_left_bytes = nb_slots_left * sizeof(struct tc_call_base);
3947 
3948       /* How many draws can we fit in the current batch */
3949       const int dr = MIN2(num_draws, (size_left_bytes - DRAW_OVERHEAD_BYTES) /
3950                           ONE_DRAW_SLOT_BYTES);
3951 
3952       /* Non-indexed call or indexed with a real index buffer. */
3953       struct tc_draw_multi *p =
3954          tc_add_slot_based_call(tc, TC_CALL_draw_multi, tc_draw_multi,
3955                                 dr);
3956       if (info->index_size) {
3957          if (!take_index_buffer_ownership) {
3958             tc_set_resource_reference(&p->info.index.resource,
3959                                       info->index.resource);
3960          }
3961          tc_add_to_buffer_list(&tc->buffer_lists[tc->next_buf_list], info->index.resource);
3962       }
3963       take_index_buffer_ownership = false;
3964       memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX);
3965       p->num_draws = dr;
3966       memcpy(p->slot, &draws[total_offset], sizeof(draws[0]) * dr);
3967       num_draws -= dr;
3968 
3969       total_offset += dr;
3970    }
3971 }
3972 
3973 static void
tc_draw_user_indices_multi(struct pipe_context * _pipe,const struct pipe_draw_info * info,unsigned drawid_offset,const struct pipe_draw_indirect_info * indirect,const struct pipe_draw_start_count_bias * draws,unsigned num_draws)3974 tc_draw_user_indices_multi(struct pipe_context *_pipe,
3975                            const struct pipe_draw_info *info,
3976                            unsigned drawid_offset,
3977                            const struct pipe_draw_indirect_info *indirect,
3978                            const struct pipe_draw_start_count_bias *draws,
3979                            unsigned num_draws)
3980 {
3981    struct threaded_context *tc = threaded_context(_pipe);
3982    struct pipe_resource *buffer = NULL;
3983    unsigned buffer_offset, total_count = 0;
3984    unsigned index_size_shift = util_logbase2(info->index_size);
3985    uint8_t *ptr = NULL;
3986 
3987    /* Get the total count. */
3988    for (unsigned i = 0; i < num_draws; i++)
3989       total_count += draws[i].count;
3990 
3991    if (!total_count)
3992       return;
3993 
3994    /* Allocate space for all index buffers.
3995     *
3996     * This must be done before adding draw_vbo, because it could generate
3997     * e.g. transfer_unmap and flush partially-uninitialized draw_vbo
3998     * to the driver if it was done afterwards.
3999     */
4000    u_upload_alloc(tc->base.stream_uploader, 0,
4001                   total_count << index_size_shift, 4,
4002                   &buffer_offset, &buffer, (void**)&ptr);
4003    if (unlikely(!buffer))
4004       return;
4005 
4006    int total_offset = 0;
4007    unsigned offset = 0;
4008    while (num_draws) {
4009       struct tc_batch *next = &tc->batch_slots[tc->next];
4010 
4011       int nb_slots_left = TC_SLOTS_PER_BATCH - 1 - next->num_total_slots;
4012       /* If there isn't enough place for one draw, try to fill the next one */
4013       if (nb_slots_left < SLOTS_FOR_ONE_DRAW)
4014          nb_slots_left = TC_SLOTS_PER_BATCH - 1;
4015       const int size_left_bytes = nb_slots_left * sizeof(struct tc_call_base);
4016 
4017       /* How many draws can we fit in the current batch */
4018       const int dr = MIN2(num_draws, (size_left_bytes - DRAW_OVERHEAD_BYTES) /
4019                           ONE_DRAW_SLOT_BYTES);
4020 
4021       struct tc_draw_multi *p =
4022          tc_add_slot_based_call(tc, TC_CALL_draw_multi, tc_draw_multi,
4023                                 dr);
4024       memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_INDEXBUF_AND_MIN_MAX_INDEX);
4025 
4026       if (total_offset == 0)
4027          /* the first slot inherits the reference from u_upload_alloc() */
4028          p->info.index.resource = buffer;
4029       else
4030          /* all following slots need a new reference */
4031          tc_set_resource_reference(&p->info.index.resource, buffer);
4032 
4033       p->num_draws = dr;
4034 
4035       /* Upload index buffers. */
4036       for (unsigned i = 0; i < dr; i++) {
4037          unsigned count = draws[i + total_offset].count;
4038 
4039          if (!count) {
4040             p->slot[i].start = 0;
4041             p->slot[i].count = 0;
4042             p->slot[i].index_bias = 0;
4043             continue;
4044          }
4045 
4046          unsigned size = count << index_size_shift;
4047          memcpy(ptr + offset,
4048                 (uint8_t*)info->index.user +
4049                 (draws[i + total_offset].start << index_size_shift), size);
4050          p->slot[i].start = (buffer_offset + offset) >> index_size_shift;
4051          p->slot[i].count = count;
4052          p->slot[i].index_bias = draws[i + total_offset].index_bias;
4053          offset += size;
4054       }
4055 
4056       total_offset += dr;
4057       num_draws -= dr;
4058    }
4059 }
4060 
4061 static void
tc_draw_indirect(struct pipe_context * _pipe,const struct pipe_draw_info * info,unsigned drawid_offset,const struct pipe_draw_indirect_info * indirect,const struct pipe_draw_start_count_bias * draws,unsigned num_draws)4062 tc_draw_indirect(struct pipe_context *_pipe, const struct pipe_draw_info *info,
4063                  unsigned drawid_offset,
4064                  const struct pipe_draw_indirect_info *indirect,
4065                  const struct pipe_draw_start_count_bias *draws,
4066                  unsigned num_draws)
4067 {
4068    struct threaded_context *tc = threaded_context(_pipe);
4069    assert(!info->has_user_indices);
4070    assert(num_draws == 1);
4071 
4072    struct tc_draw_indirect *p =
4073       tc_add_call(tc, TC_CALL_draw_indirect, tc_draw_indirect);
4074    struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
4075 
4076    if (info->index_size) {
4077       if (!info->take_index_buffer_ownership) {
4078          tc_set_resource_reference(&p->info.index.resource,
4079                                    info->index.resource);
4080       }
4081       tc_add_to_buffer_list(next, info->index.resource);
4082    }
4083    memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX);
4084 
4085    tc_set_resource_reference(&p->indirect.buffer, indirect->buffer);
4086    tc_set_resource_reference(&p->indirect.indirect_draw_count,
4087                              indirect->indirect_draw_count);
4088    p->indirect.count_from_stream_output = NULL;
4089    pipe_so_target_reference(&p->indirect.count_from_stream_output,
4090                             indirect->count_from_stream_output);
4091 
4092    if (indirect->buffer)
4093       tc_add_to_buffer_list(next, indirect->buffer);
4094    if (indirect->indirect_draw_count)
4095       tc_add_to_buffer_list(next, indirect->indirect_draw_count);
4096    if (indirect->count_from_stream_output)
4097       tc_add_to_buffer_list(next, indirect->count_from_stream_output->buffer);
4098 
4099    memcpy(&p->indirect, indirect, sizeof(*indirect));
4100    p->draw.start = draws[0].start;
4101 }
4102 
4103 /* Dispatch table for tc_draw_vbo:
4104  *
4105  * Indexed by:
4106  *    [is_indirect * 8 + index_size_and_has_user_indices * 4 +
4107  *     is_multi_draw * 2 + non_zero_draw_id]
4108  */
4109 static pipe_draw_func draw_funcs[16] = {
4110    tc_draw_single,
4111    tc_draw_single_draw_id,
4112    tc_draw_multi,
4113    tc_draw_multi,
4114    tc_draw_user_indices_single,
4115    tc_draw_user_indices_single_draw_id,
4116    tc_draw_user_indices_multi,
4117    tc_draw_user_indices_multi,
4118    tc_draw_indirect,
4119    tc_draw_indirect,
4120    tc_draw_indirect,
4121    tc_draw_indirect,
4122    tc_draw_indirect,
4123    tc_draw_indirect,
4124    tc_draw_indirect,
4125    tc_draw_indirect,
4126 };
4127 
4128 void
tc_draw_vbo(struct pipe_context * _pipe,const struct pipe_draw_info * info,unsigned drawid_offset,const struct pipe_draw_indirect_info * indirect,const struct pipe_draw_start_count_bias * draws,unsigned num_draws)4129 tc_draw_vbo(struct pipe_context *_pipe, const struct pipe_draw_info *info,
4130             unsigned drawid_offset,
4131             const struct pipe_draw_indirect_info *indirect,
4132             const struct pipe_draw_start_count_bias *draws,
4133             unsigned num_draws)
4134 {
4135    STATIC_ASSERT(DRAW_INFO_SIZE_WITHOUT_INDEXBUF_AND_MIN_MAX_INDEX +
4136                  sizeof(intptr_t) == offsetof(struct pipe_draw_info, min_index));
4137 
4138    struct threaded_context *tc = threaded_context(_pipe);
4139    if (tc->options.parse_renderpass_info)
4140       tc_parse_draw(tc);
4141 
4142    /* Use a function table to call the desired variant of draw_vbo. */
4143    unsigned index = (indirect != NULL) * 8 +
4144                     (info->index_size && info->has_user_indices) * 4 +
4145                     (num_draws > 1) * 2 + (drawid_offset != 0);
4146    draw_funcs[index](_pipe, info, drawid_offset, indirect, draws, num_draws);
4147 
4148    /* This must be after tc_add_*call, which can flush the batch. */
4149    if (unlikely(tc->add_all_gfx_bindings_to_buffer_list))
4150       tc_add_all_gfx_bindings_to_buffer_list(tc);
4151 }
4152 
4153 struct tc_draw_single *
tc_add_draw_single_call(struct pipe_context * _pipe,struct pipe_resource * index_bo)4154 tc_add_draw_single_call(struct pipe_context *_pipe,
4155                         struct pipe_resource *index_bo)
4156 {
4157    struct threaded_context *tc = threaded_context(_pipe);
4158 
4159    if (tc->options.parse_renderpass_info)
4160       tc_parse_draw(tc);
4161 
4162    struct tc_draw_single *p =
4163       tc_add_call(tc, TC_CALL_draw_single, tc_draw_single);
4164 
4165    if (index_bo)
4166       tc_add_to_buffer_list(&tc->buffer_lists[tc->next_buf_list], index_bo);
4167 
4168    /* This must be after tc_add_*call, which can flush the batch. */
4169    if (unlikely(tc->add_all_gfx_bindings_to_buffer_list))
4170       tc_add_all_gfx_bindings_to_buffer_list(tc);
4171 
4172    return p;
4173 }
4174 
4175 struct tc_draw_vstate_single {
4176    struct tc_call_base base;
4177    struct pipe_draw_start_count_bias draw;
4178 
4179    /* The following states must be together without holes because they are
4180     * compared by draw merging.
4181     */
4182    struct pipe_vertex_state *state;
4183    uint32_t partial_velem_mask;
4184    struct pipe_draw_vertex_state_info info;
4185 };
4186 
4187 static bool
is_next_call_a_mergeable_draw_vstate(struct tc_draw_vstate_single * first,struct tc_draw_vstate_single * next)4188 is_next_call_a_mergeable_draw_vstate(struct tc_draw_vstate_single *first,
4189                                      struct tc_draw_vstate_single *next)
4190 {
4191    if (next->base.call_id != TC_CALL_draw_vstate_single)
4192       return false;
4193 
4194    return !memcmp(&first->state, &next->state,
4195                   offsetof(struct tc_draw_vstate_single, info) +
4196                   sizeof(struct pipe_draw_vertex_state_info) -
4197                   offsetof(struct tc_draw_vstate_single, state));
4198 }
4199 
4200 static uint16_t ALWAYS_INLINE
tc_call_draw_vstate_single(struct pipe_context * pipe,void * call)4201 tc_call_draw_vstate_single(struct pipe_context *pipe, void *call)
4202 {
4203    /* Draw call merging. */
4204    struct tc_draw_vstate_single *first = to_call(call, tc_draw_vstate_single);
4205    struct tc_draw_vstate_single *next = get_next_call(first, tc_draw_vstate_single);
4206 
4207    /* If at least 2 consecutive draw calls can be merged... */
4208    if (is_next_call_a_mergeable_draw_vstate(first, next)) {
4209       /* The maximum number of merged draws is given by the batch size. */
4210       struct pipe_draw_start_count_bias draws[TC_SLOTS_PER_BATCH /
4211                                               call_size(tc_draw_vstate_single)];
4212       unsigned num_draws = 2;
4213 
4214       draws[0] = first->draw;
4215       draws[1] = next->draw;
4216 
4217       /* Find how many other draws can be merged. */
4218       next = get_next_call(next, tc_draw_vstate_single);
4219       for (; is_next_call_a_mergeable_draw_vstate(first, next);
4220            next = get_next_call(next, tc_draw_vstate_single),
4221            num_draws++)
4222          draws[num_draws] = next->draw;
4223 
4224       pipe->draw_vertex_state(pipe, first->state, first->partial_velem_mask,
4225                               first->info, draws, num_draws);
4226       /* Since all draws use the same state, drop all references at once. */
4227       tc_drop_vertex_state_references(first->state, num_draws);
4228 
4229       return call_size(tc_draw_vstate_single) * num_draws;
4230    }
4231 
4232    pipe->draw_vertex_state(pipe, first->state, first->partial_velem_mask,
4233                            first->info, &first->draw, 1);
4234    tc_drop_vertex_state_references(first->state, 1);
4235    return call_size(tc_draw_vstate_single);
4236 }
4237 
4238 struct tc_draw_vstate_multi {
4239    struct tc_call_base base;
4240    uint32_t partial_velem_mask;
4241    struct pipe_draw_vertex_state_info info;
4242    unsigned num_draws;
4243    struct pipe_vertex_state *state;
4244    struct pipe_draw_start_count_bias slot[0];
4245 };
4246 
4247 static uint16_t ALWAYS_INLINE
tc_call_draw_vstate_multi(struct pipe_context * pipe,void * call)4248 tc_call_draw_vstate_multi(struct pipe_context *pipe, void *call)
4249 {
4250    struct tc_draw_vstate_multi *info = (struct tc_draw_vstate_multi*)call;
4251 
4252    pipe->draw_vertex_state(pipe, info->state, info->partial_velem_mask,
4253                            info->info, info->slot, info->num_draws);
4254    tc_drop_vertex_state_references(info->state, 1);
4255    return info->base.num_slots;
4256 }
4257 
4258 static void
tc_draw_vertex_state(struct pipe_context * _pipe,struct pipe_vertex_state * state,uint32_t partial_velem_mask,struct pipe_draw_vertex_state_info info,const struct pipe_draw_start_count_bias * draws,unsigned num_draws)4259 tc_draw_vertex_state(struct pipe_context *_pipe,
4260                      struct pipe_vertex_state *state,
4261                      uint32_t partial_velem_mask,
4262                      struct pipe_draw_vertex_state_info info,
4263                      const struct pipe_draw_start_count_bias *draws,
4264                      unsigned num_draws)
4265 {
4266    struct threaded_context *tc = threaded_context(_pipe);
4267    if (tc->options.parse_renderpass_info)
4268       tc_parse_draw(tc);
4269 
4270    if (num_draws == 1) {
4271       /* Single draw. */
4272       struct tc_draw_vstate_single *p =
4273          tc_add_call(tc, TC_CALL_draw_vstate_single, tc_draw_vstate_single);
4274       p->partial_velem_mask = partial_velem_mask;
4275       p->draw = draws[0];
4276       p->info.mode = info.mode;
4277       p->info.take_vertex_state_ownership = false;
4278 
4279       /* This should be always 0 for simplicity because we assume that
4280        * index_bias doesn't vary.
4281        */
4282       assert(draws[0].index_bias == 0);
4283 
4284       if (!info.take_vertex_state_ownership)
4285          tc_set_vertex_state_reference(&p->state, state);
4286       else
4287          p->state = state;
4288 
4289 
4290       /* This must be after tc_add_*call, which can flush the batch. */
4291       if (unlikely(tc->add_all_gfx_bindings_to_buffer_list))
4292          tc_add_all_gfx_bindings_to_buffer_list(tc);
4293       return;
4294    }
4295 
4296    const int draw_overhead_bytes = sizeof(struct tc_draw_vstate_multi);
4297    const int one_draw_slot_bytes = sizeof(((struct tc_draw_vstate_multi*)NULL)->slot[0]);
4298    const int slots_for_one_draw = DIV_ROUND_UP(draw_overhead_bytes + one_draw_slot_bytes,
4299                                                sizeof(struct tc_call_base));
4300    /* Multi draw. */
4301    int total_offset = 0;
4302    bool take_vertex_state_ownership = info.take_vertex_state_ownership;
4303    while (num_draws) {
4304       struct tc_batch *next = &tc->batch_slots[tc->next];
4305 
4306       int nb_slots_left = TC_SLOTS_PER_BATCH - 1 - next->num_total_slots;
4307       /* If there isn't enough place for one draw, try to fill the next one */
4308       if (nb_slots_left < slots_for_one_draw)
4309          nb_slots_left = TC_SLOTS_PER_BATCH - 1;
4310       const int size_left_bytes = nb_slots_left * sizeof(struct tc_call_base);
4311 
4312       /* How many draws can we fit in the current batch */
4313       const int dr = MIN2(num_draws, (size_left_bytes - draw_overhead_bytes) / one_draw_slot_bytes);
4314 
4315       /* Non-indexed call or indexed with a real index buffer. */
4316       struct tc_draw_vstate_multi *p =
4317          tc_add_slot_based_call(tc, TC_CALL_draw_vstate_multi, tc_draw_vstate_multi, dr);
4318 
4319       if (!take_vertex_state_ownership)
4320          tc_set_vertex_state_reference(&p->state, state);
4321       else
4322          p->state = state;
4323 
4324       take_vertex_state_ownership = false;
4325       p->partial_velem_mask = partial_velem_mask;
4326       p->info.mode = info.mode;
4327       p->info.take_vertex_state_ownership = false;
4328       p->num_draws = dr;
4329       memcpy(p->slot, &draws[total_offset], sizeof(draws[0]) * dr);
4330       num_draws -= dr;
4331 
4332       total_offset += dr;
4333    }
4334 
4335 
4336    /* This must be after tc_add_*call, which can flush the batch. */
4337    if (unlikely(tc->add_all_gfx_bindings_to_buffer_list))
4338       tc_add_all_gfx_bindings_to_buffer_list(tc);
4339 }
4340 
4341 struct tc_launch_grid_call {
4342    struct tc_call_base base;
4343    struct pipe_grid_info info;
4344 };
4345 
4346 static uint16_t ALWAYS_INLINE
tc_call_launch_grid(struct pipe_context * pipe,void * call)4347 tc_call_launch_grid(struct pipe_context *pipe, void *call)
4348 {
4349    struct pipe_grid_info *p = &to_call(call, tc_launch_grid_call)->info;
4350 
4351    pipe->launch_grid(pipe, p);
4352    tc_drop_resource_reference(p->indirect);
4353    return call_size(tc_launch_grid_call);
4354 }
4355 
4356 static void
tc_launch_grid(struct pipe_context * _pipe,const struct pipe_grid_info * info)4357 tc_launch_grid(struct pipe_context *_pipe,
4358                const struct pipe_grid_info *info)
4359 {
4360    struct threaded_context *tc = threaded_context(_pipe);
4361    struct tc_launch_grid_call *p = tc_add_call(tc, TC_CALL_launch_grid,
4362                                                tc_launch_grid_call);
4363    assert(info->input == NULL);
4364 
4365    tc_set_resource_reference(&p->info.indirect, info->indirect);
4366    memcpy(&p->info, info, sizeof(*info));
4367 
4368    if (info->indirect)
4369       tc_add_to_buffer_list(&tc->buffer_lists[tc->next_buf_list], info->indirect);
4370 
4371    /* This must be after tc_add_*call, which can flush the batch. */
4372    if (unlikely(tc->add_all_compute_bindings_to_buffer_list))
4373       tc_add_all_compute_bindings_to_buffer_list(tc);
4374 }
4375 
4376 static uint16_t ALWAYS_INLINE
tc_call_resource_copy_region(struct pipe_context * pipe,void * call)4377 tc_call_resource_copy_region(struct pipe_context *pipe, void *call)
4378 {
4379    struct tc_resource_copy_region *p = to_call(call, tc_resource_copy_region);
4380 
4381    pipe->resource_copy_region(pipe, p->dst, p->dst_level, p->dstx, p->dsty,
4382                               p->dstz, p->src, p->src_level, &p->src_box);
4383    tc_drop_resource_reference(p->dst);
4384    tc_drop_resource_reference(p->src);
4385    return call_size(tc_resource_copy_region);
4386 }
4387 
4388 static void
tc_resource_copy_region(struct pipe_context * _pipe,struct pipe_resource * dst,unsigned dst_level,unsigned dstx,unsigned dsty,unsigned dstz,struct pipe_resource * src,unsigned src_level,const struct pipe_box * src_box)4389 tc_resource_copy_region(struct pipe_context *_pipe,
4390                         struct pipe_resource *dst, unsigned dst_level,
4391                         unsigned dstx, unsigned dsty, unsigned dstz,
4392                         struct pipe_resource *src, unsigned src_level,
4393                         const struct pipe_box *src_box)
4394 {
4395    struct threaded_context *tc = threaded_context(_pipe);
4396    struct threaded_resource *tdst = threaded_resource(dst);
4397    struct tc_resource_copy_region *p =
4398       tc_add_call(tc, TC_CALL_resource_copy_region,
4399                   tc_resource_copy_region);
4400 
4401    if (dst->target == PIPE_BUFFER)
4402       tc_buffer_disable_cpu_storage(dst);
4403 
4404    tc_set_resource_batch_usage(tc, dst);
4405    tc_set_resource_reference(&p->dst, dst);
4406    p->dst_level = dst_level;
4407    p->dstx = dstx;
4408    p->dsty = dsty;
4409    p->dstz = dstz;
4410    tc_set_resource_batch_usage(tc, src);
4411    tc_set_resource_reference(&p->src, src);
4412    p->src_level = src_level;
4413    p->src_box = *src_box;
4414 
4415    if (dst->target == PIPE_BUFFER) {
4416       struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
4417 
4418       tc_add_to_buffer_list(next, src);
4419       tc_add_to_buffer_list(next, dst);
4420 
4421       util_range_add(&tdst->b, &tdst->valid_buffer_range,
4422                      dstx, dstx + src_box->width);
4423    }
4424 }
4425 
4426 struct tc_blit_call {
4427    struct tc_call_base base;
4428    struct pipe_blit_info info;
4429 };
4430 
4431 static uint16_t ALWAYS_INLINE
tc_call_blit(struct pipe_context * pipe,void * call)4432 tc_call_blit(struct pipe_context *pipe, void *call)
4433 {
4434    struct pipe_blit_info *blit = &to_call(call, tc_blit_call)->info;
4435 
4436    pipe->blit(pipe, blit);
4437    tc_drop_resource_reference(blit->dst.resource);
4438    tc_drop_resource_reference(blit->src.resource);
4439    return call_size(tc_blit_call);
4440 }
4441 
4442 static void
tc_blit_enqueue(struct threaded_context * tc,const struct pipe_blit_info * info)4443 tc_blit_enqueue(struct threaded_context *tc, const struct pipe_blit_info *info)
4444 {
4445    struct tc_blit_call *blit = tc_add_call(tc, TC_CALL_blit, tc_blit_call);
4446 
4447    tc_set_resource_batch_usage(tc, info->dst.resource);
4448    tc_set_resource_reference(&blit->info.dst.resource, info->dst.resource);
4449    tc_set_resource_batch_usage(tc, info->src.resource);
4450    tc_set_resource_reference(&blit->info.src.resource, info->src.resource);
4451    memcpy(&blit->info, info, sizeof(*info));
4452 }
4453 
4454 static void
tc_blit(struct pipe_context * _pipe,const struct pipe_blit_info * info)4455 tc_blit(struct pipe_context *_pipe, const struct pipe_blit_info *info)
4456 {
4457    struct threaded_context *tc = threaded_context(_pipe);
4458 
4459    /* filter out untracked non-resolves */
4460    if (!tc->options.parse_renderpass_info ||
4461        info->src.resource->nr_samples <= 1 ||
4462        info->dst.resource->nr_samples > 1) {
4463       tc_blit_enqueue(tc, info);
4464       return;
4465    }
4466 
4467    if (tc->fb_resolve == info->dst.resource) {
4468       /* optimize out this blit entirely */
4469       tc->renderpass_info_recording->has_resolve = true;
4470       return;
4471    }
4472    for (unsigned i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
4473       if (tc->fb_resources[i] == info->src.resource) {
4474          tc->renderpass_info_recording->has_resolve = true;
4475          break;
4476       }
4477    }
4478    tc_blit_enqueue(tc, info);
4479 }
4480 
4481 struct tc_generate_mipmap {
4482    struct tc_call_base base;
4483    enum pipe_format format;
4484    unsigned base_level;
4485    unsigned last_level;
4486    unsigned first_layer;
4487    unsigned last_layer;
4488    struct pipe_resource *res;
4489 };
4490 
4491 static uint16_t ALWAYS_INLINE
tc_call_generate_mipmap(struct pipe_context * pipe,void * call)4492 tc_call_generate_mipmap(struct pipe_context *pipe, void *call)
4493 {
4494    struct tc_generate_mipmap *p = to_call(call, tc_generate_mipmap);
4495    ASSERTED bool result = pipe->generate_mipmap(pipe, p->res, p->format,
4496                                                     p->base_level,
4497                                                     p->last_level,
4498                                                     p->first_layer,
4499                                                     p->last_layer);
4500    assert(result);
4501    tc_drop_resource_reference(p->res);
4502    return call_size(tc_generate_mipmap);
4503 }
4504 
4505 static bool
tc_generate_mipmap(struct pipe_context * _pipe,struct pipe_resource * res,enum pipe_format format,unsigned base_level,unsigned last_level,unsigned first_layer,unsigned last_layer)4506 tc_generate_mipmap(struct pipe_context *_pipe,
4507                    struct pipe_resource *res,
4508                    enum pipe_format format,
4509                    unsigned base_level,
4510                    unsigned last_level,
4511                    unsigned first_layer,
4512                    unsigned last_layer)
4513 {
4514    struct threaded_context *tc = threaded_context(_pipe);
4515    struct pipe_context *pipe = tc->pipe;
4516    struct pipe_screen *screen = pipe->screen;
4517    unsigned bind = PIPE_BIND_SAMPLER_VIEW;
4518 
4519    if (util_format_is_depth_or_stencil(format))
4520       bind = PIPE_BIND_DEPTH_STENCIL;
4521    else
4522       bind = PIPE_BIND_RENDER_TARGET;
4523 
4524    if (!screen->is_format_supported(screen, format, res->target,
4525                                     res->nr_samples, res->nr_storage_samples,
4526                                     bind))
4527       return false;
4528 
4529    struct tc_generate_mipmap *p =
4530       tc_add_call(tc, TC_CALL_generate_mipmap, tc_generate_mipmap);
4531 
4532    tc_set_resource_batch_usage(tc, res);
4533    tc_set_resource_reference(&p->res, res);
4534    p->format = format;
4535    p->base_level = base_level;
4536    p->last_level = last_level;
4537    p->first_layer = first_layer;
4538    p->last_layer = last_layer;
4539    return true;
4540 }
4541 
4542 struct tc_resource_call {
4543    struct tc_call_base base;
4544    struct pipe_resource *resource;
4545 };
4546 
4547 static uint16_t ALWAYS_INLINE
tc_call_flush_resource(struct pipe_context * pipe,void * call)4548 tc_call_flush_resource(struct pipe_context *pipe, void *call)
4549 {
4550    struct pipe_resource *resource = to_call(call, tc_resource_call)->resource;
4551 
4552    pipe->flush_resource(pipe, resource);
4553    tc_drop_resource_reference(resource);
4554    return call_size(tc_resource_call);
4555 }
4556 
4557 static void
tc_flush_resource(struct pipe_context * _pipe,struct pipe_resource * resource)4558 tc_flush_resource(struct pipe_context *_pipe, struct pipe_resource *resource)
4559 {
4560    struct threaded_context *tc = threaded_context(_pipe);
4561    struct tc_resource_call *call = tc_add_call(tc, TC_CALL_flush_resource,
4562                                                tc_resource_call);
4563 
4564    tc_set_resource_batch_usage(tc, resource);
4565    tc_set_resource_reference(&call->resource, resource);
4566 }
4567 
4568 static uint16_t ALWAYS_INLINE
tc_call_invalidate_resource(struct pipe_context * pipe,void * call)4569 tc_call_invalidate_resource(struct pipe_context *pipe, void *call)
4570 {
4571    struct pipe_resource *resource = to_call(call, tc_resource_call)->resource;
4572 
4573    pipe->invalidate_resource(pipe, resource);
4574    tc_drop_resource_reference(resource);
4575    return call_size(tc_resource_call);
4576 }
4577 
4578 static void
tc_invalidate_resource(struct pipe_context * _pipe,struct pipe_resource * resource)4579 tc_invalidate_resource(struct pipe_context *_pipe,
4580                        struct pipe_resource *resource)
4581 {
4582    struct threaded_context *tc = threaded_context(_pipe);
4583 
4584    if (resource->target == PIPE_BUFFER) {
4585       tc_invalidate_buffer(tc, threaded_resource(resource));
4586       return;
4587    }
4588 
4589    struct tc_resource_call *call = tc_add_call(tc, TC_CALL_invalidate_resource,
4590                                                tc_resource_call);
4591    tc_set_resource_batch_usage(tc, resource);
4592    tc_set_resource_reference(&call->resource, resource);
4593 
4594    struct tc_renderpass_info *info = tc_get_renderpass_info(tc);
4595    if (info) {
4596       if (tc->fb_resources[PIPE_MAX_COLOR_BUFS] == resource) {
4597          info->zsbuf_invalidate = true;
4598       } else {
4599          for (unsigned i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
4600             if (tc->fb_resources[i] == resource)
4601                info->cbuf_invalidate |= BITFIELD_BIT(i);
4602          }
4603       }
4604    }
4605 }
4606 
4607 struct tc_clear {
4608    struct tc_call_base base;
4609    bool scissor_state_set;
4610    uint8_t stencil;
4611    uint16_t buffers;
4612    float depth;
4613    struct pipe_scissor_state scissor_state;
4614    union pipe_color_union color;
4615 };
4616 
4617 static uint16_t ALWAYS_INLINE
tc_call_clear(struct pipe_context * pipe,void * call)4618 tc_call_clear(struct pipe_context *pipe, void *call)
4619 {
4620    struct tc_clear *p = to_call(call, tc_clear);
4621 
4622    pipe->clear(pipe, p->buffers, p->scissor_state_set ? &p->scissor_state : NULL, &p->color, p->depth, p->stencil);
4623    return call_size(tc_clear);
4624 }
4625 
4626 static void
tc_clear(struct pipe_context * _pipe,unsigned buffers,const struct pipe_scissor_state * scissor_state,const union pipe_color_union * color,double depth,unsigned stencil)4627 tc_clear(struct pipe_context *_pipe, unsigned buffers, const struct pipe_scissor_state *scissor_state,
4628          const union pipe_color_union *color, double depth,
4629          unsigned stencil)
4630 {
4631    struct threaded_context *tc = threaded_context(_pipe);
4632    struct tc_clear *p = tc_add_call(tc, TC_CALL_clear, tc_clear);
4633 
4634    p->buffers = buffers;
4635    if (scissor_state) {
4636       p->scissor_state = *scissor_state;
4637       struct tc_renderpass_info *info = tc_get_renderpass_info(tc);
4638       /* partial clear info is useful for drivers to know whether any zs writes occur;
4639        * drivers are responsible for optimizing partial clear -> full clear
4640        */
4641       if (info && buffers & PIPE_CLEAR_DEPTHSTENCIL)
4642          info->zsbuf_clear_partial |= !info->zsbuf_clear;
4643    } else {
4644       struct tc_renderpass_info *info = tc_get_renderpass_info(tc);
4645       if (info) {
4646          /* full clears use a different load operation, but are only valid if draws haven't occurred yet */
4647          info->cbuf_clear |= (buffers >> 2) & ~info->cbuf_load;
4648          if (buffers & PIPE_CLEAR_DEPTHSTENCIL) {
4649             if (!info->zsbuf_load && !info->zsbuf_clear_partial)
4650                info->zsbuf_clear = true;
4651             else if (!info->zsbuf_clear)
4652                /* this is a clear that occurred after a draw: flag as partial to ensure it isn't ignored */
4653                info->zsbuf_clear_partial = true;
4654          }
4655       }
4656    }
4657    p->scissor_state_set = !!scissor_state;
4658    p->color = *color;
4659    p->depth = depth;
4660    p->stencil = stencil;
4661 }
4662 
4663 struct tc_clear_render_target {
4664    struct tc_call_base base;
4665    bool render_condition_enabled;
4666    unsigned dstx;
4667    unsigned dsty;
4668    unsigned width;
4669    unsigned height;
4670    union pipe_color_union color;
4671    struct pipe_surface *dst;
4672 };
4673 
4674 static uint16_t ALWAYS_INLINE
tc_call_clear_render_target(struct pipe_context * pipe,void * call)4675 tc_call_clear_render_target(struct pipe_context *pipe, void *call)
4676 {
4677    struct tc_clear_render_target *p = to_call(call, tc_clear_render_target);
4678 
4679    pipe->clear_render_target(pipe, p->dst, &p->color, p->dstx, p->dsty, p->width, p->height,
4680                              p->render_condition_enabled);
4681    tc_drop_surface_reference(p->dst);
4682    return call_size(tc_clear_render_target);
4683 }
4684 
4685 static void
tc_clear_render_target(struct pipe_context * _pipe,struct pipe_surface * dst,const union pipe_color_union * color,unsigned dstx,unsigned dsty,unsigned width,unsigned height,bool render_condition_enabled)4686 tc_clear_render_target(struct pipe_context *_pipe,
4687                        struct pipe_surface *dst,
4688                        const union pipe_color_union *color,
4689                        unsigned dstx, unsigned dsty,
4690                        unsigned width, unsigned height,
4691                        bool render_condition_enabled)
4692 {
4693    struct threaded_context *tc = threaded_context(_pipe);
4694    struct tc_clear_render_target *p = tc_add_call(tc, TC_CALL_clear_render_target, tc_clear_render_target);
4695    p->dst = NULL;
4696    pipe_surface_reference(&p->dst, dst);
4697    p->color = *color;
4698    p->dstx = dstx;
4699    p->dsty = dsty;
4700    p->width = width;
4701    p->height = height;
4702    p->render_condition_enabled = render_condition_enabled;
4703 }
4704 
4705 
4706 struct tc_clear_depth_stencil {
4707    struct tc_call_base base;
4708    bool render_condition_enabled;
4709    float depth;
4710    unsigned clear_flags;
4711    unsigned stencil;
4712    unsigned dstx;
4713    unsigned dsty;
4714    unsigned width;
4715    unsigned height;
4716    struct pipe_surface *dst;
4717 };
4718 
4719 
4720 static uint16_t ALWAYS_INLINE
tc_call_clear_depth_stencil(struct pipe_context * pipe,void * call)4721 tc_call_clear_depth_stencil(struct pipe_context *pipe, void *call)
4722 {
4723    struct tc_clear_depth_stencil *p = to_call(call, tc_clear_depth_stencil);
4724 
4725    pipe->clear_depth_stencil(pipe, p->dst, p->clear_flags, p->depth, p->stencil,
4726                              p->dstx, p->dsty, p->width, p->height,
4727                              p->render_condition_enabled);
4728    tc_drop_surface_reference(p->dst);
4729    return call_size(tc_clear_depth_stencil);
4730 }
4731 
4732 static void
tc_clear_depth_stencil(struct pipe_context * _pipe,struct pipe_surface * dst,unsigned clear_flags,double depth,unsigned stencil,unsigned dstx,unsigned dsty,unsigned width,unsigned height,bool render_condition_enabled)4733 tc_clear_depth_stencil(struct pipe_context *_pipe,
4734                        struct pipe_surface *dst, unsigned clear_flags,
4735                        double depth, unsigned stencil, unsigned dstx,
4736                        unsigned dsty, unsigned width, unsigned height,
4737                        bool render_condition_enabled)
4738 {
4739    struct threaded_context *tc = threaded_context(_pipe);
4740    struct tc_clear_depth_stencil *p = tc_add_call(tc, TC_CALL_clear_depth_stencil, tc_clear_depth_stencil);
4741    p->dst = NULL;
4742    pipe_surface_reference(&p->dst, dst);
4743    p->clear_flags = clear_flags;
4744    p->depth = depth;
4745    p->stencil = stencil;
4746    p->dstx = dstx;
4747    p->dsty = dsty;
4748    p->width = width;
4749    p->height = height;
4750    p->render_condition_enabled = render_condition_enabled;
4751 }
4752 
4753 struct tc_clear_buffer {
4754    struct tc_call_base base;
4755    uint8_t clear_value_size;
4756    unsigned offset;
4757    unsigned size;
4758    char clear_value[16];
4759    struct pipe_resource *res;
4760 };
4761 
4762 static uint16_t ALWAYS_INLINE
tc_call_clear_buffer(struct pipe_context * pipe,void * call)4763 tc_call_clear_buffer(struct pipe_context *pipe, void *call)
4764 {
4765    struct tc_clear_buffer *p = to_call(call, tc_clear_buffer);
4766 
4767    pipe->clear_buffer(pipe, p->res, p->offset, p->size, p->clear_value,
4768                       p->clear_value_size);
4769    tc_drop_resource_reference(p->res);
4770    return call_size(tc_clear_buffer);
4771 }
4772 
4773 static void
tc_clear_buffer(struct pipe_context * _pipe,struct pipe_resource * res,unsigned offset,unsigned size,const void * clear_value,int clear_value_size)4774 tc_clear_buffer(struct pipe_context *_pipe, struct pipe_resource *res,
4775                 unsigned offset, unsigned size,
4776                 const void *clear_value, int clear_value_size)
4777 {
4778    struct threaded_context *tc = threaded_context(_pipe);
4779    struct threaded_resource *tres = threaded_resource(res);
4780    struct tc_clear_buffer *p =
4781       tc_add_call(tc, TC_CALL_clear_buffer, tc_clear_buffer);
4782 
4783    tc_buffer_disable_cpu_storage(res);
4784 
4785    tc_set_resource_reference(&p->res, res);
4786    tc_add_to_buffer_list(&tc->buffer_lists[tc->next_buf_list], res);
4787    p->offset = offset;
4788    p->size = size;
4789    memcpy(p->clear_value, clear_value, clear_value_size);
4790    p->clear_value_size = clear_value_size;
4791 
4792    util_range_add(&tres->b, &tres->valid_buffer_range, offset, offset + size);
4793 }
4794 
4795 struct tc_clear_texture {
4796    struct tc_call_base base;
4797    unsigned level;
4798    struct pipe_box box;
4799    char data[16];
4800    struct pipe_resource *res;
4801 };
4802 
4803 static uint16_t ALWAYS_INLINE
tc_call_clear_texture(struct pipe_context * pipe,void * call)4804 tc_call_clear_texture(struct pipe_context *pipe, void *call)
4805 {
4806    struct tc_clear_texture *p = to_call(call, tc_clear_texture);
4807 
4808    pipe->clear_texture(pipe, p->res, p->level, &p->box, p->data);
4809    tc_drop_resource_reference(p->res);
4810    return call_size(tc_clear_texture);
4811 }
4812 
4813 static void
tc_clear_texture(struct pipe_context * _pipe,struct pipe_resource * res,unsigned level,const struct pipe_box * box,const void * data)4814 tc_clear_texture(struct pipe_context *_pipe, struct pipe_resource *res,
4815                  unsigned level, const struct pipe_box *box, const void *data)
4816 {
4817    struct threaded_context *tc = threaded_context(_pipe);
4818    struct tc_clear_texture *p =
4819       tc_add_call(tc, TC_CALL_clear_texture, tc_clear_texture);
4820 
4821    tc_set_resource_batch_usage(tc, res);
4822    tc_set_resource_reference(&p->res, res);
4823    p->level = level;
4824    p->box = *box;
4825    memcpy(p->data, data,
4826           util_format_get_blocksize(res->format));
4827 }
4828 
4829 struct tc_resource_commit {
4830    struct tc_call_base base;
4831    bool commit;
4832    unsigned level;
4833    struct pipe_box box;
4834    struct pipe_resource *res;
4835 };
4836 
4837 static uint16_t ALWAYS_INLINE
tc_call_resource_commit(struct pipe_context * pipe,void * call)4838 tc_call_resource_commit(struct pipe_context *pipe, void *call)
4839 {
4840    struct tc_resource_commit *p = to_call(call, tc_resource_commit);
4841 
4842    pipe->resource_commit(pipe, p->res, p->level, &p->box, p->commit);
4843    tc_drop_resource_reference(p->res);
4844    return call_size(tc_resource_commit);
4845 }
4846 
4847 static bool
tc_resource_commit(struct pipe_context * _pipe,struct pipe_resource * res,unsigned level,struct pipe_box * box,bool commit)4848 tc_resource_commit(struct pipe_context *_pipe, struct pipe_resource *res,
4849                    unsigned level, struct pipe_box *box, bool commit)
4850 {
4851    struct threaded_context *tc = threaded_context(_pipe);
4852    struct tc_resource_commit *p =
4853       tc_add_call(tc, TC_CALL_resource_commit, tc_resource_commit);
4854 
4855    tc_set_resource_reference(&p->res, res);
4856    tc_set_resource_batch_usage(tc, res);
4857    p->level = level;
4858    p->box = *box;
4859    p->commit = commit;
4860    return true; /* we don't care about the return value for this call */
4861 }
4862 
4863 static unsigned
tc_init_intel_perf_query_info(struct pipe_context * _pipe)4864 tc_init_intel_perf_query_info(struct pipe_context *_pipe)
4865 {
4866    struct threaded_context *tc = threaded_context(_pipe);
4867    struct pipe_context *pipe = tc->pipe;
4868 
4869    return pipe->init_intel_perf_query_info(pipe);
4870 }
4871 
4872 static void
tc_get_intel_perf_query_info(struct pipe_context * _pipe,unsigned query_index,const char ** name,uint32_t * data_size,uint32_t * n_counters,uint32_t * n_active)4873 tc_get_intel_perf_query_info(struct pipe_context *_pipe,
4874                              unsigned query_index,
4875                              const char **name,
4876                              uint32_t *data_size,
4877                              uint32_t *n_counters,
4878                              uint32_t *n_active)
4879 {
4880    struct threaded_context *tc = threaded_context(_pipe);
4881    struct pipe_context *pipe = tc->pipe;
4882 
4883    tc_sync(tc); /* n_active vs begin/end_intel_perf_query */
4884    pipe->get_intel_perf_query_info(pipe, query_index, name, data_size,
4885          n_counters, n_active);
4886 }
4887 
4888 static void
tc_get_intel_perf_query_counter_info(struct pipe_context * _pipe,unsigned query_index,unsigned counter_index,const char ** name,const char ** desc,uint32_t * offset,uint32_t * data_size,uint32_t * type_enum,uint32_t * data_type_enum,uint64_t * raw_max)4889 tc_get_intel_perf_query_counter_info(struct pipe_context *_pipe,
4890                                      unsigned query_index,
4891                                      unsigned counter_index,
4892                                      const char **name,
4893                                      const char **desc,
4894                                      uint32_t *offset,
4895                                      uint32_t *data_size,
4896                                      uint32_t *type_enum,
4897                                      uint32_t *data_type_enum,
4898                                      uint64_t *raw_max)
4899 {
4900    struct threaded_context *tc = threaded_context(_pipe);
4901    struct pipe_context *pipe = tc->pipe;
4902 
4903    pipe->get_intel_perf_query_counter_info(pipe, query_index, counter_index,
4904          name, desc, offset, data_size, type_enum, data_type_enum, raw_max);
4905 }
4906 
4907 static struct pipe_query *
tc_new_intel_perf_query_obj(struct pipe_context * _pipe,unsigned query_index)4908 tc_new_intel_perf_query_obj(struct pipe_context *_pipe, unsigned query_index)
4909 {
4910    struct threaded_context *tc = threaded_context(_pipe);
4911    struct pipe_context *pipe = tc->pipe;
4912 
4913    return pipe->new_intel_perf_query_obj(pipe, query_index);
4914 }
4915 
4916 static uint16_t ALWAYS_INLINE
tc_call_begin_intel_perf_query(struct pipe_context * pipe,void * call)4917 tc_call_begin_intel_perf_query(struct pipe_context *pipe, void *call)
4918 {
4919    (void)pipe->begin_intel_perf_query(pipe, to_call(call, tc_query_call)->query);
4920    return call_size(tc_query_call);
4921 }
4922 
4923 static bool
tc_begin_intel_perf_query(struct pipe_context * _pipe,struct pipe_query * q)4924 tc_begin_intel_perf_query(struct pipe_context *_pipe, struct pipe_query *q)
4925 {
4926    struct threaded_context *tc = threaded_context(_pipe);
4927 
4928    tc_add_call(tc, TC_CALL_begin_intel_perf_query, tc_query_call)->query = q;
4929 
4930    /* assume success, begin failure can be signaled from get_intel_perf_query_data */
4931    return true;
4932 }
4933 
4934 static uint16_t ALWAYS_INLINE
tc_call_end_intel_perf_query(struct pipe_context * pipe,void * call)4935 tc_call_end_intel_perf_query(struct pipe_context *pipe, void *call)
4936 {
4937    pipe->end_intel_perf_query(pipe, to_call(call, tc_query_call)->query);
4938    return call_size(tc_query_call);
4939 }
4940 
4941 static void
tc_end_intel_perf_query(struct pipe_context * _pipe,struct pipe_query * q)4942 tc_end_intel_perf_query(struct pipe_context *_pipe, struct pipe_query *q)
4943 {
4944    struct threaded_context *tc = threaded_context(_pipe);
4945 
4946    tc_add_call(tc, TC_CALL_end_intel_perf_query, tc_query_call)->query = q;
4947 }
4948 
4949 static void
tc_delete_intel_perf_query(struct pipe_context * _pipe,struct pipe_query * q)4950 tc_delete_intel_perf_query(struct pipe_context *_pipe, struct pipe_query *q)
4951 {
4952    struct threaded_context *tc = threaded_context(_pipe);
4953    struct pipe_context *pipe = tc->pipe;
4954 
4955    tc_sync(tc); /* flush potentially pending begin/end_intel_perf_queries */
4956    pipe->delete_intel_perf_query(pipe, q);
4957 }
4958 
4959 static void
tc_wait_intel_perf_query(struct pipe_context * _pipe,struct pipe_query * q)4960 tc_wait_intel_perf_query(struct pipe_context *_pipe, struct pipe_query *q)
4961 {
4962    struct threaded_context *tc = threaded_context(_pipe);
4963    struct pipe_context *pipe = tc->pipe;
4964 
4965    tc_sync(tc); /* flush potentially pending begin/end_intel_perf_queries */
4966    pipe->wait_intel_perf_query(pipe, q);
4967 }
4968 
4969 static bool
tc_is_intel_perf_query_ready(struct pipe_context * _pipe,struct pipe_query * q)4970 tc_is_intel_perf_query_ready(struct pipe_context *_pipe, struct pipe_query *q)
4971 {
4972    struct threaded_context *tc = threaded_context(_pipe);
4973    struct pipe_context *pipe = tc->pipe;
4974 
4975    tc_sync(tc); /* flush potentially pending begin/end_intel_perf_queries */
4976    return pipe->is_intel_perf_query_ready(pipe, q);
4977 }
4978 
4979 static bool
tc_get_intel_perf_query_data(struct pipe_context * _pipe,struct pipe_query * q,size_t data_size,uint32_t * data,uint32_t * bytes_written)4980 tc_get_intel_perf_query_data(struct pipe_context *_pipe,
4981                              struct pipe_query *q,
4982                              size_t data_size,
4983                              uint32_t *data,
4984                              uint32_t *bytes_written)
4985 {
4986    struct threaded_context *tc = threaded_context(_pipe);
4987    struct pipe_context *pipe = tc->pipe;
4988 
4989    tc_sync(tc); /* flush potentially pending begin/end_intel_perf_queries */
4990    return pipe->get_intel_perf_query_data(pipe, q, data_size, data, bytes_written);
4991 }
4992 
4993 /********************************************************************
4994  * callback
4995  */
4996 
4997 struct tc_callback_call {
4998    struct tc_call_base base;
4999    void (*fn)(void *data);
5000    void *data;
5001 };
5002 
5003 static uint16_t ALWAYS_INLINE
tc_call_callback(UNUSED struct pipe_context * pipe,void * call)5004 tc_call_callback(UNUSED struct pipe_context *pipe, void *call)
5005 {
5006    struct tc_callback_call *p = to_call(call, tc_callback_call);
5007 
5008    p->fn(p->data);
5009    return call_size(tc_callback_call);
5010 }
5011 
5012 static void
tc_callback(struct pipe_context * _pipe,void (* fn)(void *),void * data,bool asap)5013 tc_callback(struct pipe_context *_pipe, void (*fn)(void *), void *data,
5014             bool asap)
5015 {
5016    struct threaded_context *tc = threaded_context(_pipe);
5017 
5018    if (asap && tc_is_sync(tc)) {
5019       fn(data);
5020       return;
5021    }
5022 
5023    struct tc_callback_call *p =
5024       tc_add_call(tc, TC_CALL_callback, tc_callback_call);
5025    p->fn = fn;
5026    p->data = data;
5027 }
5028 
5029 /********************************************************************
5030  * batch execution in the driver thread
5031  */
5032 
5033 typedef uint16_t (*tc_execute)(struct pipe_context *pipe, void *call);
5034 
5035 ALWAYS_INLINE static void
batch_execute(struct tc_batch * batch,struct pipe_context * pipe,bool parsing)5036 batch_execute(struct tc_batch *batch, struct pipe_context *pipe, bool parsing)
5037 {
5038    /* if the framebuffer state is persisting from a previous batch,
5039     * begin incrementing renderpass info on the first set_framebuffer_state call
5040     */
5041    bool first = !batch->first_set_fb;
5042    uint64_t *iter = batch->slots;
5043 
5044    while (1) {
5045       struct tc_call_base *call = (struct tc_call_base *)iter;
5046 
5047       tc_assert(call->sentinel == TC_SENTINEL);
5048 #if TC_DEBUG >= 3
5049       tc_printf("CALL: %s", tc_call_names[call->call_id]);
5050 #endif
5051       TC_TRACE_SCOPE(call->call_id);
5052 
5053       /* This executes the call using a switch. */
5054       switch (call->call_id) {
5055 #define CALL(name) \
5056       case TC_CALL_##name: \
5057          iter += tc_call_##name(pipe, call); \
5058          break;
5059 #include "u_threaded_context_calls.h"
5060 #undef CALL
5061       case TC_END_BATCH:
5062          return;
5063       }
5064 
5065       if (parsing) {
5066          if (call->call_id == TC_CALL_flush) {
5067             /* always increment renderpass info for non-deferred flushes */
5068             batch->tc->renderpass_info = incr_rp_info(batch->tc->renderpass_info);
5069             /* if a flush happens, renderpass info is always incremented after */
5070             first = false;
5071          } else if (call->call_id == TC_CALL_set_framebuffer_state) {
5072             /* the renderpass info pointer is already set at the start of the batch,
5073              * so don't increment on the first set_framebuffer_state call
5074              */
5075             if (!first)
5076                batch->tc->renderpass_info = incr_rp_info(batch->tc->renderpass_info);
5077             first = false;
5078          } else if (call->call_id == TC_CALL_draw_single ||
5079                     call->call_id == TC_CALL_draw_multi ||
5080                     (call->call_id >= TC_CALL_draw_single_drawid &&
5081                      call->call_id <= TC_CALL_draw_vstate_multi)) {
5082             /* if a draw happens before a set_framebuffer_state on this batch,
5083              * begin incrementing renderpass data
5084              */
5085             first = false;
5086          }
5087       }
5088    }
5089 }
5090 
5091 static void
tc_batch_execute(void * job,UNUSED void * gdata,int thread_index)5092 tc_batch_execute(void *job, UNUSED void *gdata, int thread_index)
5093 {
5094    struct tc_batch *batch = job;
5095    struct pipe_context *pipe = batch->tc->pipe;
5096 
5097    tc_batch_check(batch);
5098    tc_set_driver_thread(batch->tc);
5099 
5100    assert(!batch->token);
5101 
5102    /* setup renderpass info */
5103    batch->tc->renderpass_info = batch->renderpass_infos.data;
5104 
5105    if (batch->tc->options.parse_renderpass_info) {
5106       batch_execute(batch, pipe, true);
5107 
5108       struct tc_batch_rp_info *info = batch->renderpass_infos.data;
5109       for (unsigned i = 0; i < batch->max_renderpass_info_idx + 1; i++) {
5110          if (info[i].next)
5111             info[i].next->prev = NULL;
5112          info[i].next = NULL;
5113       }
5114    } else {
5115       batch_execute(batch, pipe, false);
5116    }
5117 
5118    /* Add the fence to the list of fences for the driver to signal at the next
5119     * flush, which we use for tracking which buffers are referenced by
5120     * an unflushed command buffer.
5121     */
5122    struct threaded_context *tc = batch->tc;
5123    struct util_queue_fence *fence =
5124       &tc->buffer_lists[batch->buffer_list_index].driver_flushed_fence;
5125 
5126    if (tc->options.driver_calls_flush_notify) {
5127       tc->signal_fences_next_flush[tc->num_signal_fences_next_flush++] = fence;
5128 
5129       /* Since our buffer lists are chained as a ring, we need to flush
5130        * the context twice as we go around the ring to make the driver signal
5131        * the buffer list fences, so that the producer thread can reuse the buffer
5132        * list structures for the next batches without waiting.
5133        */
5134       unsigned half_ring = TC_MAX_BUFFER_LISTS / 2;
5135       if (batch->buffer_list_index % half_ring == half_ring - 1)
5136          pipe->flush(pipe, NULL, PIPE_FLUSH_ASYNC);
5137    } else {
5138       util_queue_fence_signal(fence);
5139    }
5140 
5141    tc_clear_driver_thread(batch->tc);
5142    tc_batch_check(batch);
5143    batch->num_total_slots = 0;
5144    batch->last_mergeable_call = NULL;
5145    batch->first_set_fb = false;
5146    batch->max_renderpass_info_idx = 0;
5147    batch->tc->last_completed = batch->batch_idx;
5148 }
5149 
5150 /********************************************************************
5151  * create & destroy
5152  */
5153 
5154 static void
tc_destroy(struct pipe_context * _pipe)5155 tc_destroy(struct pipe_context *_pipe)
5156 {
5157    struct threaded_context *tc = threaded_context(_pipe);
5158    struct pipe_context *pipe = tc->pipe;
5159 
5160    if (tc->base.const_uploader &&
5161        tc->base.stream_uploader != tc->base.const_uploader)
5162       u_upload_destroy(tc->base.const_uploader);
5163 
5164    if (tc->base.stream_uploader)
5165       u_upload_destroy(tc->base.stream_uploader);
5166 
5167    tc_sync(tc);
5168 
5169    if (util_queue_is_initialized(&tc->queue)) {
5170       util_queue_destroy(&tc->queue);
5171 
5172       for (unsigned i = 0; i < TC_MAX_BATCHES; i++) {
5173          util_queue_fence_destroy(&tc->batch_slots[i].fence);
5174          util_dynarray_fini(&tc->batch_slots[i].renderpass_infos);
5175          assert(!tc->batch_slots[i].token);
5176       }
5177    }
5178 
5179    slab_destroy_child(&tc->pool_transfers);
5180    assert(tc->batch_slots[tc->next].num_total_slots == 0);
5181    pipe->destroy(pipe);
5182 
5183    for (unsigned i = 0; i < TC_MAX_BUFFER_LISTS; i++) {
5184       if (!util_queue_fence_is_signalled(&tc->buffer_lists[i].driver_flushed_fence))
5185          util_queue_fence_signal(&tc->buffer_lists[i].driver_flushed_fence);
5186       util_queue_fence_destroy(&tc->buffer_lists[i].driver_flushed_fence);
5187    }
5188 
5189    for (unsigned i = 0; i < ARRAY_SIZE(tc->fb_resources); i++)
5190       pipe_resource_reference(&tc->fb_resources[i], NULL);
5191    pipe_resource_reference(&tc->fb_resolve, NULL);
5192 
5193    FREE(tc);
5194 }
5195 
tc_driver_internal_flush_notify(struct threaded_context * tc)5196 void tc_driver_internal_flush_notify(struct threaded_context *tc)
5197 {
5198    /* Allow drivers to call this function even for internal contexts that
5199     * don't have tc. It simplifies drivers.
5200     */
5201    if (!tc)
5202       return;
5203 
5204    /* Signal fences set by tc_batch_execute. */
5205    for (unsigned i = 0; i < tc->num_signal_fences_next_flush; i++)
5206       util_queue_fence_signal(tc->signal_fences_next_flush[i]);
5207 
5208    tc->num_signal_fences_next_flush = 0;
5209 }
5210 
5211 /**
5212  * Wrap an existing pipe_context into a threaded_context.
5213  *
5214  * \param pipe                 pipe_context to wrap
5215  * \param parent_transfer_pool parent slab pool set up for creating pipe_-
5216  *                             transfer objects; the driver should have one
5217  *                             in pipe_screen.
5218  * \param replace_buffer  callback for replacing a pipe_resource's storage
5219  *                        with another pipe_resource's storage.
5220  * \param options         optional TC options/callbacks
5221  * \param out  if successful, the threaded_context will be returned here in
5222  *             addition to the return value if "out" != NULL
5223  */
5224 struct pipe_context *
threaded_context_create(struct pipe_context * pipe,struct slab_parent_pool * parent_transfer_pool,tc_replace_buffer_storage_func replace_buffer,const struct threaded_context_options * options,struct threaded_context ** out)5225 threaded_context_create(struct pipe_context *pipe,
5226                         struct slab_parent_pool *parent_transfer_pool,
5227                         tc_replace_buffer_storage_func replace_buffer,
5228                         const struct threaded_context_options *options,
5229                         struct threaded_context **out)
5230 {
5231    struct threaded_context *tc;
5232 
5233    if (!pipe)
5234       return NULL;
5235 
5236    if (!debug_get_bool_option("GALLIUM_THREAD", true))
5237       return pipe;
5238 
5239    tc = CALLOC_STRUCT(threaded_context);
5240    if (!tc) {
5241       pipe->destroy(pipe);
5242       return NULL;
5243    }
5244 
5245    if (options) {
5246       /* this is unimplementable */
5247       assert(!(options->parse_renderpass_info && options->driver_calls_flush_notify));
5248       tc->options = *options;
5249    }
5250 
5251    pipe = trace_context_create_threaded(pipe->screen, pipe, &replace_buffer, &tc->options);
5252 
5253    /* The driver context isn't wrapped, so set its "priv" to NULL. */
5254    pipe->priv = NULL;
5255 
5256    tc->pipe = pipe;
5257    tc->replace_buffer_storage = replace_buffer;
5258    tc->map_buffer_alignment =
5259       pipe->screen->caps.min_map_buffer_alignment;
5260    tc->ubo_alignment =
5261       MAX2(pipe->screen->caps.constant_buffer_offset_alignment, 64);
5262    tc->base.priv = pipe; /* priv points to the wrapped driver context */
5263    tc->base.screen = pipe->screen;
5264    tc->base.destroy = tc_destroy;
5265    tc->base.callback = tc_callback;
5266 
5267    tc->base.stream_uploader = u_upload_clone(&tc->base, pipe->stream_uploader);
5268    if (pipe->stream_uploader == pipe->const_uploader)
5269       tc->base.const_uploader = tc->base.stream_uploader;
5270    else
5271       tc->base.const_uploader = u_upload_clone(&tc->base, pipe->const_uploader);
5272 
5273    if (!tc->base.stream_uploader || !tc->base.const_uploader)
5274       goto fail;
5275 
5276    tc->use_forced_staging_uploads = true;
5277 
5278    /* The queue size is the number of batches "waiting". Batches are removed
5279     * from the queue before being executed, so keep one tc_batch slot for that
5280     * execution. Also, keep one unused slot for an unflushed batch.
5281     */
5282    if (!util_queue_init(&tc->queue, "gdrv", TC_MAX_BATCHES - 2, 1, 0, NULL))
5283       goto fail;
5284 
5285    tc->last_completed = -1;
5286    for (unsigned i = 0; i < TC_MAX_BATCHES; i++) {
5287 #if !defined(NDEBUG) && TC_DEBUG >= 1
5288       tc->batch_slots[i].sentinel = TC_SENTINEL;
5289 #endif
5290       tc->batch_slots[i].tc = tc;
5291       tc->batch_slots[i].batch_idx = i;
5292       util_queue_fence_init(&tc->batch_slots[i].fence);
5293       tc->batch_slots[i].renderpass_info_idx = -1;
5294       if (tc->options.parse_renderpass_info) {
5295          util_dynarray_init(&tc->batch_slots[i].renderpass_infos, NULL);
5296          tc_batch_renderpass_infos_resize(tc, &tc->batch_slots[i]);
5297       }
5298    }
5299    for (unsigned i = 0; i < TC_MAX_BUFFER_LISTS; i++)
5300       util_queue_fence_init(&tc->buffer_lists[i].driver_flushed_fence);
5301 
5302    list_inithead(&tc->unflushed_queries);
5303 
5304    slab_create_child(&tc->pool_transfers, parent_transfer_pool);
5305 
5306    /* If you have different limits in each shader stage, set the maximum. */
5307    struct pipe_screen *screen = pipe->screen;;
5308    tc->max_const_buffers =
5309       screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT,
5310                                PIPE_SHADER_CAP_MAX_CONST_BUFFERS);
5311    tc->max_shader_buffers =
5312       screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT,
5313                                PIPE_SHADER_CAP_MAX_SHADER_BUFFERS);
5314    tc->max_images =
5315       screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT,
5316                                PIPE_SHADER_CAP_MAX_SHADER_IMAGES);
5317    tc->max_samplers =
5318       screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT,
5319                                PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS);
5320 
5321    tc->base.set_context_param = tc_set_context_param; /* always set this */
5322 
5323 #define CTX_INIT(_member) \
5324    tc->base._member = tc->pipe->_member ? tc_##_member : NULL
5325 
5326    CTX_INIT(flush);
5327    CTX_INIT(draw_vbo);
5328    CTX_INIT(draw_vertex_state);
5329    CTX_INIT(launch_grid);
5330    CTX_INIT(resource_copy_region);
5331    CTX_INIT(blit);
5332    CTX_INIT(clear);
5333    CTX_INIT(clear_render_target);
5334    CTX_INIT(clear_depth_stencil);
5335    CTX_INIT(clear_buffer);
5336    CTX_INIT(clear_texture);
5337    CTX_INIT(flush_resource);
5338    CTX_INIT(generate_mipmap);
5339    CTX_INIT(render_condition);
5340    CTX_INIT(create_query);
5341    CTX_INIT(create_batch_query);
5342    CTX_INIT(destroy_query);
5343    CTX_INIT(begin_query);
5344    CTX_INIT(end_query);
5345    CTX_INIT(get_query_result);
5346    CTX_INIT(get_query_result_resource);
5347    CTX_INIT(set_active_query_state);
5348    CTX_INIT(create_blend_state);
5349    CTX_INIT(bind_blend_state);
5350    CTX_INIT(delete_blend_state);
5351    CTX_INIT(create_sampler_state);
5352    CTX_INIT(bind_sampler_states);
5353    CTX_INIT(delete_sampler_state);
5354    CTX_INIT(create_rasterizer_state);
5355    CTX_INIT(bind_rasterizer_state);
5356    CTX_INIT(delete_rasterizer_state);
5357    CTX_INIT(create_depth_stencil_alpha_state);
5358    CTX_INIT(bind_depth_stencil_alpha_state);
5359    CTX_INIT(delete_depth_stencil_alpha_state);
5360    CTX_INIT(link_shader);
5361    CTX_INIT(create_fs_state);
5362    CTX_INIT(bind_fs_state);
5363    CTX_INIT(delete_fs_state);
5364    CTX_INIT(create_vs_state);
5365    CTX_INIT(bind_vs_state);
5366    CTX_INIT(delete_vs_state);
5367    CTX_INIT(create_gs_state);
5368    CTX_INIT(bind_gs_state);
5369    CTX_INIT(delete_gs_state);
5370    CTX_INIT(create_tcs_state);
5371    CTX_INIT(bind_tcs_state);
5372    CTX_INIT(delete_tcs_state);
5373    CTX_INIT(create_tes_state);
5374    CTX_INIT(bind_tes_state);
5375    CTX_INIT(delete_tes_state);
5376    CTX_INIT(create_compute_state);
5377    CTX_INIT(bind_compute_state);
5378    CTX_INIT(delete_compute_state);
5379    CTX_INIT(create_vertex_elements_state);
5380    CTX_INIT(bind_vertex_elements_state);
5381    CTX_INIT(delete_vertex_elements_state);
5382    CTX_INIT(set_blend_color);
5383    CTX_INIT(set_stencil_ref);
5384    CTX_INIT(set_sample_mask);
5385    CTX_INIT(set_min_samples);
5386    CTX_INIT(set_clip_state);
5387    CTX_INIT(set_constant_buffer);
5388    CTX_INIT(set_inlinable_constants);
5389    CTX_INIT(set_framebuffer_state);
5390    CTX_INIT(set_polygon_stipple);
5391    CTX_INIT(set_sample_locations);
5392    CTX_INIT(set_scissor_states);
5393    CTX_INIT(set_viewport_states);
5394    CTX_INIT(set_window_rectangles);
5395    CTX_INIT(set_sampler_views);
5396    CTX_INIT(set_tess_state);
5397    CTX_INIT(set_patch_vertices);
5398    CTX_INIT(set_shader_buffers);
5399    CTX_INIT(set_shader_images);
5400    CTX_INIT(set_vertex_buffers);
5401    CTX_INIT(create_stream_output_target);
5402    CTX_INIT(stream_output_target_destroy);
5403    CTX_INIT(set_stream_output_targets);
5404    CTX_INIT(create_sampler_view);
5405    CTX_INIT(sampler_view_destroy);
5406    CTX_INIT(create_surface);
5407    CTX_INIT(surface_destroy);
5408    CTX_INIT(buffer_map);
5409    CTX_INIT(texture_map);
5410    CTX_INIT(transfer_flush_region);
5411    CTX_INIT(buffer_unmap);
5412    CTX_INIT(texture_unmap);
5413    CTX_INIT(buffer_subdata);
5414    CTX_INIT(texture_subdata);
5415    CTX_INIT(texture_barrier);
5416    CTX_INIT(memory_barrier);
5417    CTX_INIT(resource_commit);
5418    CTX_INIT(create_video_codec);
5419    CTX_INIT(create_video_buffer);
5420    CTX_INIT(set_compute_resources);
5421    CTX_INIT(set_global_binding);
5422    CTX_INIT(get_sample_position);
5423    CTX_INIT(invalidate_resource);
5424    CTX_INIT(get_device_reset_status);
5425    CTX_INIT(set_device_reset_callback);
5426    CTX_INIT(dump_debug_state);
5427    CTX_INIT(set_log_context);
5428    CTX_INIT(emit_string_marker);
5429    CTX_INIT(set_debug_callback);
5430    CTX_INIT(create_fence_fd);
5431    CTX_INIT(fence_server_sync);
5432    CTX_INIT(fence_server_signal);
5433    CTX_INIT(get_timestamp);
5434    CTX_INIT(create_texture_handle);
5435    CTX_INIT(delete_texture_handle);
5436    CTX_INIT(make_texture_handle_resident);
5437    CTX_INIT(create_image_handle);
5438    CTX_INIT(delete_image_handle);
5439    CTX_INIT(make_image_handle_resident);
5440    CTX_INIT(set_frontend_noop);
5441    CTX_INIT(init_intel_perf_query_info);
5442    CTX_INIT(get_intel_perf_query_info);
5443    CTX_INIT(get_intel_perf_query_counter_info);
5444    CTX_INIT(new_intel_perf_query_obj);
5445    CTX_INIT(begin_intel_perf_query);
5446    CTX_INIT(end_intel_perf_query);
5447    CTX_INIT(delete_intel_perf_query);
5448    CTX_INIT(wait_intel_perf_query);
5449    CTX_INIT(is_intel_perf_query_ready);
5450    CTX_INIT(get_intel_perf_query_data);
5451 #undef CTX_INIT
5452 
5453    if (out)
5454       *out = tc;
5455 
5456    tc_begin_next_buffer_list(tc);
5457    if (tc->options.parse_renderpass_info)
5458       tc_batch_increment_renderpass_info(tc, tc->next, false);
5459    return &tc->base;
5460 
5461 fail:
5462    tc_destroy(&tc->base);
5463    return NULL;
5464 }
5465 
5466 void
threaded_context_init_bytes_mapped_limit(struct threaded_context * tc,unsigned divisor)5467 threaded_context_init_bytes_mapped_limit(struct threaded_context *tc, unsigned divisor)
5468 {
5469    uint64_t total_ram;
5470    if (os_get_total_physical_memory(&total_ram)) {
5471       tc->bytes_mapped_limit = total_ram / divisor;
5472       if (sizeof(void*) == 4)
5473          tc->bytes_mapped_limit = MIN2(tc->bytes_mapped_limit, 512*1024*1024UL);
5474    }
5475 }
5476 
5477 const struct tc_renderpass_info *
threaded_context_get_renderpass_info(struct threaded_context * tc)5478 threaded_context_get_renderpass_info(struct threaded_context *tc)
5479 {
5480    assert(tc->renderpass_info && tc->options.parse_renderpass_info);
5481    struct tc_batch_rp_info *info = tc_batch_rp_info(tc->renderpass_info);
5482    while (1) {
5483       util_queue_fence_wait(&info->ready);
5484       if (!info->next)
5485          return &info->info;
5486       info = info->next;
5487    }
5488 }
5489