1 /**************************************************************************
2 *
3 * Copyright 2017 Advanced Micro Devices, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * on the rights to use, copy, modify, merge, publish, distribute, sub
10 * license, and/or sell copies of the Software, and to permit persons to whom
11 * the Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
21 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
22 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
23 * USE OR OTHER DEALINGS IN THE SOFTWARE.
24 *
25 **************************************************************************/
26
27 #include "util/u_threaded_context.h"
28 #include "util/u_cpu_detect.h"
29 #include "util/format/u_format.h"
30 #include "util/u_inlines.h"
31 #include "util/u_memory.h"
32 #include "util/u_upload_mgr.h"
33 #include "driver_trace/tr_context.h"
34 #include "util/log.h"
35 #include "compiler/shader_info.h"
36
37 #if TC_DEBUG >= 1
38 #define tc_assert assert
39 #else
40 #define tc_assert(x)
41 #endif
42
43 #if TC_DEBUG >= 2
44 #define tc_printf mesa_logi
45 #define tc_asprintf asprintf
46 #define tc_strcmp strcmp
47 #else
48 #define tc_printf(...)
49 #define tc_asprintf(...) 0
50 #define tc_strcmp(...) 0
51 #endif
52
53 #define TC_SENTINEL 0x5ca1ab1e
54
55 enum tc_call_id {
56 #define CALL(name) TC_CALL_##name,
57 #include "u_threaded_context_calls.h"
58 #undef CALL
59 TC_NUM_CALLS,
60 };
61
62 #if TC_DEBUG >= 3
63 static const char *tc_call_names[] = {
64 #define CALL(name) #name,
65 #include "u_threaded_context_calls.h"
66 #undef CALL
67 };
68 #endif
69
70 typedef uint16_t (*tc_execute)(struct pipe_context *pipe, void *call, uint64_t *last);
71
72 static const tc_execute execute_func[TC_NUM_CALLS];
73
74 static void
tc_batch_check(UNUSED struct tc_batch * batch)75 tc_batch_check(UNUSED struct tc_batch *batch)
76 {
77 tc_assert(batch->sentinel == TC_SENTINEL);
78 tc_assert(batch->num_total_slots <= TC_SLOTS_PER_BATCH);
79 }
80
81 static void
tc_debug_check(struct threaded_context * tc)82 tc_debug_check(struct threaded_context *tc)
83 {
84 for (unsigned i = 0; i < TC_MAX_BATCHES; i++) {
85 tc_batch_check(&tc->batch_slots[i]);
86 tc_assert(tc->batch_slots[i].tc == tc);
87 }
88 }
89
90 static void
tc_set_driver_thread(struct threaded_context * tc)91 tc_set_driver_thread(struct threaded_context *tc)
92 {
93 #ifndef NDEBUG
94 tc->driver_thread = util_get_thread_id();
95 #endif
96 }
97
98 static void
tc_clear_driver_thread(struct threaded_context * tc)99 tc_clear_driver_thread(struct threaded_context *tc)
100 {
101 #ifndef NDEBUG
102 memset(&tc->driver_thread, 0, sizeof(tc->driver_thread));
103 #endif
104 }
105
106 static void *
to_call_check(void * ptr,unsigned num_slots)107 to_call_check(void *ptr, unsigned num_slots)
108 {
109 #if TC_DEBUG >= 1
110 struct tc_call_base *call = ptr;
111 tc_assert(call->num_slots == num_slots);
112 #endif
113 return ptr;
114 }
115 #define to_call(ptr, type) ((struct type *)to_call_check((void *)(ptr), call_size(type)))
116
117 #define size_to_slots(size) DIV_ROUND_UP(size, 8)
118 #define call_size(type) size_to_slots(sizeof(struct type))
119 #define call_size_with_slots(type, num_slots) size_to_slots( \
120 sizeof(struct type) + sizeof(((struct type*)NULL)->slot[0]) * (num_slots))
121 #define get_next_call(ptr, type) ((struct type*)((uint64_t*)ptr + call_size(type)))
122
123 /* Assign src to dst while dst is uninitialized. */
124 static inline void
tc_set_resource_reference(struct pipe_resource ** dst,struct pipe_resource * src)125 tc_set_resource_reference(struct pipe_resource **dst, struct pipe_resource *src)
126 {
127 *dst = src;
128 pipe_reference(NULL, &src->reference); /* only increment refcount */
129 }
130
131 /* Assign src to dst while dst is uninitialized. */
132 static inline void
tc_set_vertex_state_reference(struct pipe_vertex_state ** dst,struct pipe_vertex_state * src)133 tc_set_vertex_state_reference(struct pipe_vertex_state **dst,
134 struct pipe_vertex_state *src)
135 {
136 *dst = src;
137 pipe_reference(NULL, &src->reference); /* only increment refcount */
138 }
139
140 /* Unreference dst but don't touch the dst pointer. */
141 static inline void
tc_drop_resource_reference(struct pipe_resource * dst)142 tc_drop_resource_reference(struct pipe_resource *dst)
143 {
144 if (pipe_reference(&dst->reference, NULL)) /* only decrement refcount */
145 pipe_resource_destroy(dst);
146 }
147
148 /* Unreference dst but don't touch the dst pointer. */
149 static inline void
tc_drop_surface_reference(struct pipe_surface * dst)150 tc_drop_surface_reference(struct pipe_surface *dst)
151 {
152 if (pipe_reference(&dst->reference, NULL)) /* only decrement refcount */
153 dst->context->surface_destroy(dst->context, dst);
154 }
155
156 /* Unreference dst but don't touch the dst pointer. */
157 static inline void
tc_drop_sampler_view_reference(struct pipe_sampler_view * dst)158 tc_drop_sampler_view_reference(struct pipe_sampler_view *dst)
159 {
160 if (pipe_reference(&dst->reference, NULL)) /* only decrement refcount */
161 dst->context->sampler_view_destroy(dst->context, dst);
162 }
163
164 /* Unreference dst but don't touch the dst pointer. */
165 static inline void
tc_drop_so_target_reference(struct pipe_stream_output_target * dst)166 tc_drop_so_target_reference(struct pipe_stream_output_target *dst)
167 {
168 if (pipe_reference(&dst->reference, NULL)) /* only decrement refcount */
169 dst->context->stream_output_target_destroy(dst->context, dst);
170 }
171
172 /**
173 * Subtract the given number of references.
174 */
175 static inline void
tc_drop_vertex_state_references(struct pipe_vertex_state * dst,int num_refs)176 tc_drop_vertex_state_references(struct pipe_vertex_state *dst, int num_refs)
177 {
178 int count = p_atomic_add_return(&dst->reference.count, -num_refs);
179
180 assert(count >= 0);
181 /* Underflows shouldn't happen, but let's be safe. */
182 if (count <= 0)
183 dst->screen->vertex_state_destroy(dst->screen, dst);
184 }
185
186 /* We don't want to read or write min_index and max_index, because
187 * it shouldn't be needed by drivers at this point.
188 */
189 #define DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX \
190 offsetof(struct pipe_draw_info, min_index)
191
192 static void
tc_batch_execute(void * job,UNUSED void * gdata,int thread_index)193 tc_batch_execute(void *job, UNUSED void *gdata, int thread_index)
194 {
195 struct tc_batch *batch = job;
196 struct pipe_context *pipe = batch->tc->pipe;
197 uint64_t *last = &batch->slots[batch->num_total_slots];
198
199 tc_batch_check(batch);
200 tc_set_driver_thread(batch->tc);
201
202 assert(!batch->token);
203
204 for (uint64_t *iter = batch->slots; iter != last;) {
205 struct tc_call_base *call = (struct tc_call_base *)iter;
206
207 tc_assert(call->sentinel == TC_SENTINEL);
208
209 #if TC_DEBUG >= 3
210 tc_printf("CALL: %s", tc_call_names[call->call_id]);
211 #endif
212
213 iter += execute_func[call->call_id](pipe, call, last);
214 }
215
216 /* Add the fence to the list of fences for the driver to signal at the next
217 * flush, which we use for tracking which buffers are referenced by
218 * an unflushed command buffer.
219 */
220 struct threaded_context *tc = batch->tc;
221 struct util_queue_fence *fence =
222 &tc->buffer_lists[batch->buffer_list_index].driver_flushed_fence;
223
224 if (tc->options.driver_calls_flush_notify) {
225 tc->signal_fences_next_flush[tc->num_signal_fences_next_flush++] = fence;
226
227 /* Since our buffer lists are chained as a ring, we need to flush
228 * the context twice as we go around the ring to make the driver signal
229 * the buffer list fences, so that the producer thread can reuse the buffer
230 * list structures for the next batches without waiting.
231 */
232 unsigned half_ring = TC_MAX_BUFFER_LISTS / 2;
233 if (batch->buffer_list_index % half_ring == half_ring - 1)
234 pipe->flush(pipe, NULL, PIPE_FLUSH_ASYNC);
235 } else {
236 util_queue_fence_signal(fence);
237 }
238
239 tc_clear_driver_thread(batch->tc);
240 tc_batch_check(batch);
241 batch->num_total_slots = 0;
242 }
243
244 static void
tc_begin_next_buffer_list(struct threaded_context * tc)245 tc_begin_next_buffer_list(struct threaded_context *tc)
246 {
247 tc->next_buf_list = (tc->next_buf_list + 1) % TC_MAX_BUFFER_LISTS;
248
249 tc->batch_slots[tc->next].buffer_list_index = tc->next_buf_list;
250
251 /* Clear the buffer list in the new empty batch. */
252 struct tc_buffer_list *buf_list = &tc->buffer_lists[tc->next_buf_list];
253 assert(util_queue_fence_is_signalled(&buf_list->driver_flushed_fence));
254 util_queue_fence_reset(&buf_list->driver_flushed_fence); /* set to unsignalled */
255 BITSET_ZERO(buf_list->buffer_list);
256
257 tc->add_all_gfx_bindings_to_buffer_list = true;
258 tc->add_all_compute_bindings_to_buffer_list = true;
259 }
260
261 static void
tc_batch_flush(struct threaded_context * tc)262 tc_batch_flush(struct threaded_context *tc)
263 {
264 struct tc_batch *next = &tc->batch_slots[tc->next];
265
266 tc_assert(next->num_total_slots != 0);
267 tc_batch_check(next);
268 tc_debug_check(tc);
269 tc->bytes_mapped_estimate = 0;
270 p_atomic_add(&tc->num_offloaded_slots, next->num_total_slots);
271
272 if (next->token) {
273 next->token->tc = NULL;
274 tc_unflushed_batch_token_reference(&next->token, NULL);
275 }
276
277 util_queue_add_job(&tc->queue, next, &next->fence, tc_batch_execute,
278 NULL, 0);
279 tc->last = tc->next;
280 tc->next = (tc->next + 1) % TC_MAX_BATCHES;
281 tc_begin_next_buffer_list(tc);
282 }
283
284 /* This is the function that adds variable-sized calls into the current
285 * batch. It also flushes the batch if there is not enough space there.
286 * All other higher-level "add" functions use it.
287 */
288 static void *
tc_add_sized_call(struct threaded_context * tc,enum tc_call_id id,unsigned num_slots)289 tc_add_sized_call(struct threaded_context *tc, enum tc_call_id id,
290 unsigned num_slots)
291 {
292 struct tc_batch *next = &tc->batch_slots[tc->next];
293 assert(num_slots <= TC_SLOTS_PER_BATCH);
294 tc_debug_check(tc);
295
296 if (unlikely(next->num_total_slots + num_slots > TC_SLOTS_PER_BATCH)) {
297 tc_batch_flush(tc);
298 next = &tc->batch_slots[tc->next];
299 tc_assert(next->num_total_slots == 0);
300 }
301
302 tc_assert(util_queue_fence_is_signalled(&next->fence));
303
304 struct tc_call_base *call = (struct tc_call_base*)&next->slots[next->num_total_slots];
305 next->num_total_slots += num_slots;
306
307 #if !defined(NDEBUG) && TC_DEBUG >= 1
308 call->sentinel = TC_SENTINEL;
309 #endif
310 call->call_id = id;
311 call->num_slots = num_slots;
312
313 #if TC_DEBUG >= 3
314 tc_printf("ENQUEUE: %s", tc_call_names[id]);
315 #endif
316
317 tc_debug_check(tc);
318 return call;
319 }
320
321 #define tc_add_call(tc, execute, type) \
322 ((struct type*)tc_add_sized_call(tc, execute, call_size(type)))
323
324 #define tc_add_slot_based_call(tc, execute, type, num_slots) \
325 ((struct type*)tc_add_sized_call(tc, execute, \
326 call_size_with_slots(type, num_slots)))
327
328 static bool
tc_is_sync(struct threaded_context * tc)329 tc_is_sync(struct threaded_context *tc)
330 {
331 struct tc_batch *last = &tc->batch_slots[tc->last];
332 struct tc_batch *next = &tc->batch_slots[tc->next];
333
334 return util_queue_fence_is_signalled(&last->fence) &&
335 !next->num_total_slots;
336 }
337
338 static void
_tc_sync(struct threaded_context * tc,UNUSED const char * info,UNUSED const char * func)339 _tc_sync(struct threaded_context *tc, UNUSED const char *info, UNUSED const char *func)
340 {
341 struct tc_batch *last = &tc->batch_slots[tc->last];
342 struct tc_batch *next = &tc->batch_slots[tc->next];
343 bool synced = false;
344
345 tc_debug_check(tc);
346
347 /* Only wait for queued calls... */
348 if (!util_queue_fence_is_signalled(&last->fence)) {
349 util_queue_fence_wait(&last->fence);
350 synced = true;
351 }
352
353 tc_debug_check(tc);
354
355 if (next->token) {
356 next->token->tc = NULL;
357 tc_unflushed_batch_token_reference(&next->token, NULL);
358 }
359
360 /* .. and execute unflushed calls directly. */
361 if (next->num_total_slots) {
362 p_atomic_add(&tc->num_direct_slots, next->num_total_slots);
363 tc->bytes_mapped_estimate = 0;
364 tc_batch_execute(next, NULL, 0);
365 tc_begin_next_buffer_list(tc);
366 synced = true;
367 }
368
369 if (synced) {
370 p_atomic_inc(&tc->num_syncs);
371
372 if (tc_strcmp(func, "tc_destroy") != 0) {
373 tc_printf("sync %s %s", func, info);
374 }
375 }
376
377 tc_debug_check(tc);
378 }
379
380 #define tc_sync(tc) _tc_sync(tc, "", __func__)
381 #define tc_sync_msg(tc, info) _tc_sync(tc, info, __func__)
382
383 /**
384 * Call this from fence_finish for same-context fence waits of deferred fences
385 * that haven't been flushed yet.
386 *
387 * The passed pipe_context must be the one passed to pipe_screen::fence_finish,
388 * i.e., the wrapped one.
389 */
390 void
threaded_context_flush(struct pipe_context * _pipe,struct tc_unflushed_batch_token * token,bool prefer_async)391 threaded_context_flush(struct pipe_context *_pipe,
392 struct tc_unflushed_batch_token *token,
393 bool prefer_async)
394 {
395 struct threaded_context *tc = threaded_context(_pipe);
396
397 /* This is called from the gallium frontend / application thread. */
398 if (token->tc && token->tc == tc) {
399 struct tc_batch *last = &tc->batch_slots[tc->last];
400
401 /* Prefer to do the flush in the driver thread if it is already
402 * running. That should be better for cache locality.
403 */
404 if (prefer_async || !util_queue_fence_is_signalled(&last->fence))
405 tc_batch_flush(tc);
406 else
407 tc_sync(token->tc);
408 }
409 }
410
411 static void
tc_add_to_buffer_list(struct tc_buffer_list * next,struct pipe_resource * buf)412 tc_add_to_buffer_list(struct tc_buffer_list *next, struct pipe_resource *buf)
413 {
414 uint32_t id = threaded_resource(buf)->buffer_id_unique;
415 BITSET_SET(next->buffer_list, id & TC_BUFFER_ID_MASK);
416 }
417
418 /* Set a buffer binding and add it to the buffer list. */
419 static void
tc_bind_buffer(uint32_t * binding,struct tc_buffer_list * next,struct pipe_resource * buf)420 tc_bind_buffer(uint32_t *binding, struct tc_buffer_list *next, struct pipe_resource *buf)
421 {
422 uint32_t id = threaded_resource(buf)->buffer_id_unique;
423 *binding = id;
424 BITSET_SET(next->buffer_list, id & TC_BUFFER_ID_MASK);
425 }
426
427 /* Reset a buffer binding. */
428 static void
tc_unbind_buffer(uint32_t * binding)429 tc_unbind_buffer(uint32_t *binding)
430 {
431 *binding = 0;
432 }
433
434 /* Reset a range of buffer binding slots. */
435 static void
tc_unbind_buffers(uint32_t * binding,unsigned count)436 tc_unbind_buffers(uint32_t *binding, unsigned count)
437 {
438 if (count)
439 memset(binding, 0, sizeof(*binding) * count);
440 }
441
442 static void
tc_add_bindings_to_buffer_list(BITSET_WORD * buffer_list,const uint32_t * bindings,unsigned count)443 tc_add_bindings_to_buffer_list(BITSET_WORD *buffer_list, const uint32_t *bindings,
444 unsigned count)
445 {
446 for (unsigned i = 0; i < count; i++) {
447 if (bindings[i])
448 BITSET_SET(buffer_list, bindings[i] & TC_BUFFER_ID_MASK);
449 }
450 }
451
452 static bool
tc_rebind_bindings(uint32_t old_id,uint32_t new_id,uint32_t * bindings,unsigned count)453 tc_rebind_bindings(uint32_t old_id, uint32_t new_id, uint32_t *bindings,
454 unsigned count)
455 {
456 unsigned rebind_count = 0;
457
458 for (unsigned i = 0; i < count; i++) {
459 if (bindings[i] == old_id) {
460 bindings[i] = new_id;
461 rebind_count++;
462 }
463 }
464 return rebind_count;
465 }
466
467 static void
tc_add_shader_bindings_to_buffer_list(struct threaded_context * tc,BITSET_WORD * buffer_list,enum pipe_shader_type shader)468 tc_add_shader_bindings_to_buffer_list(struct threaded_context *tc,
469 BITSET_WORD *buffer_list,
470 enum pipe_shader_type shader)
471 {
472 tc_add_bindings_to_buffer_list(buffer_list, tc->const_buffers[shader],
473 tc->max_const_buffers);
474 if (tc->seen_shader_buffers[shader]) {
475 tc_add_bindings_to_buffer_list(buffer_list, tc->shader_buffers[shader],
476 tc->max_shader_buffers);
477 }
478 if (tc->seen_image_buffers[shader]) {
479 tc_add_bindings_to_buffer_list(buffer_list, tc->image_buffers[shader],
480 tc->max_images);
481 }
482 if (tc->seen_sampler_buffers[shader]) {
483 tc_add_bindings_to_buffer_list(buffer_list, tc->sampler_buffers[shader],
484 tc->max_samplers);
485 }
486 }
487
488 static unsigned
tc_rebind_shader_bindings(struct threaded_context * tc,uint32_t old_id,uint32_t new_id,enum pipe_shader_type shader,uint32_t * rebind_mask)489 tc_rebind_shader_bindings(struct threaded_context *tc, uint32_t old_id,
490 uint32_t new_id, enum pipe_shader_type shader, uint32_t *rebind_mask)
491 {
492 unsigned ubo = 0, ssbo = 0, img = 0, sampler = 0;
493
494 ubo = tc_rebind_bindings(old_id, new_id, tc->const_buffers[shader],
495 tc->max_const_buffers);
496 if (ubo)
497 *rebind_mask |= BITFIELD_BIT(TC_BINDING_UBO_VS) << shader;
498 if (tc->seen_shader_buffers[shader]) {
499 ssbo = tc_rebind_bindings(old_id, new_id, tc->shader_buffers[shader],
500 tc->max_shader_buffers);
501 if (ssbo)
502 *rebind_mask |= BITFIELD_BIT(TC_BINDING_SSBO_VS) << shader;
503 }
504 if (tc->seen_image_buffers[shader]) {
505 img = tc_rebind_bindings(old_id, new_id, tc->image_buffers[shader],
506 tc->max_images);
507 if (img)
508 *rebind_mask |= BITFIELD_BIT(TC_BINDING_IMAGE_VS) << shader;
509 }
510 if (tc->seen_sampler_buffers[shader]) {
511 sampler = tc_rebind_bindings(old_id, new_id, tc->sampler_buffers[shader],
512 tc->max_samplers);
513 if (sampler)
514 *rebind_mask |= BITFIELD_BIT(TC_BINDING_SAMPLERVIEW_VS) << shader;
515 }
516 return ubo + ssbo + img + sampler;
517 }
518
519 /* Add all bound buffers used by VS/TCS/TES/GS/FS to the buffer list.
520 * This is called by the first draw call in a batch when we want to inherit
521 * all bindings set by the previous batch.
522 */
523 static void
tc_add_all_gfx_bindings_to_buffer_list(struct threaded_context * tc)524 tc_add_all_gfx_bindings_to_buffer_list(struct threaded_context *tc)
525 {
526 BITSET_WORD *buffer_list = tc->buffer_lists[tc->next_buf_list].buffer_list;
527
528 tc_add_bindings_to_buffer_list(buffer_list, tc->vertex_buffers, tc->max_vertex_buffers);
529 if (tc->seen_streamout_buffers)
530 tc_add_bindings_to_buffer_list(buffer_list, tc->streamout_buffers, PIPE_MAX_SO_BUFFERS);
531
532 tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_VERTEX);
533 tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_FRAGMENT);
534
535 if (tc->seen_tcs)
536 tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_TESS_CTRL);
537 if (tc->seen_tes)
538 tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_TESS_EVAL);
539 if (tc->seen_gs)
540 tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_GEOMETRY);
541
542 tc->add_all_gfx_bindings_to_buffer_list = false;
543 }
544
545 /* Add all bound buffers used by compute to the buffer list.
546 * This is called by the first compute call in a batch when we want to inherit
547 * all bindings set by the previous batch.
548 */
549 static void
tc_add_all_compute_bindings_to_buffer_list(struct threaded_context * tc)550 tc_add_all_compute_bindings_to_buffer_list(struct threaded_context *tc)
551 {
552 BITSET_WORD *buffer_list = tc->buffer_lists[tc->next_buf_list].buffer_list;
553
554 tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_COMPUTE);
555 tc->add_all_compute_bindings_to_buffer_list = false;
556 }
557
558 static unsigned
tc_rebind_buffer(struct threaded_context * tc,uint32_t old_id,uint32_t new_id,uint32_t * rebind_mask)559 tc_rebind_buffer(struct threaded_context *tc, uint32_t old_id, uint32_t new_id, uint32_t *rebind_mask)
560 {
561 unsigned vbo = 0, so = 0;
562
563 vbo = tc_rebind_bindings(old_id, new_id, tc->vertex_buffers,
564 tc->max_vertex_buffers);
565 if (vbo)
566 *rebind_mask |= BITFIELD_BIT(TC_BINDING_VERTEX_BUFFER);
567
568 if (tc->seen_streamout_buffers) {
569 so = tc_rebind_bindings(old_id, new_id, tc->streamout_buffers,
570 PIPE_MAX_SO_BUFFERS);
571 if (so)
572 *rebind_mask |= BITFIELD_BIT(TC_BINDING_STREAMOUT_BUFFER);
573 }
574 unsigned rebound = vbo + so;
575
576 rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_VERTEX, rebind_mask);
577 rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_FRAGMENT, rebind_mask);
578
579 if (tc->seen_tcs)
580 rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_TESS_CTRL, rebind_mask);
581 if (tc->seen_tes)
582 rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_TESS_EVAL, rebind_mask);
583 if (tc->seen_gs)
584 rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_GEOMETRY, rebind_mask);
585
586 rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_COMPUTE, rebind_mask);
587
588 if (rebound)
589 BITSET_SET(tc->buffer_lists[tc->next_buf_list].buffer_list, new_id & TC_BUFFER_ID_MASK);
590 return rebound;
591 }
592
593 static bool
tc_is_buffer_bound_with_mask(uint32_t id,uint32_t * bindings,unsigned binding_mask)594 tc_is_buffer_bound_with_mask(uint32_t id, uint32_t *bindings, unsigned binding_mask)
595 {
596 while (binding_mask) {
597 if (bindings[u_bit_scan(&binding_mask)] == id)
598 return true;
599 }
600 return false;
601 }
602
603 static bool
tc_is_buffer_shader_bound_for_write(struct threaded_context * tc,uint32_t id,enum pipe_shader_type shader)604 tc_is_buffer_shader_bound_for_write(struct threaded_context *tc, uint32_t id,
605 enum pipe_shader_type shader)
606 {
607 if (tc->seen_shader_buffers[shader] &&
608 tc_is_buffer_bound_with_mask(id, tc->shader_buffers[shader],
609 tc->shader_buffers_writeable_mask[shader]))
610 return true;
611
612 if (tc->seen_image_buffers[shader] &&
613 tc_is_buffer_bound_with_mask(id, tc->image_buffers[shader],
614 tc->image_buffers_writeable_mask[shader]))
615 return true;
616
617 return false;
618 }
619
620 static bool
tc_is_buffer_bound_for_write(struct threaded_context * tc,uint32_t id)621 tc_is_buffer_bound_for_write(struct threaded_context *tc, uint32_t id)
622 {
623 if (tc->seen_streamout_buffers &&
624 tc_is_buffer_bound_with_mask(id, tc->streamout_buffers,
625 BITFIELD_MASK(PIPE_MAX_SO_BUFFERS)))
626 return true;
627
628 if (tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_VERTEX) ||
629 tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_FRAGMENT) ||
630 tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_COMPUTE))
631 return true;
632
633 if (tc->seen_tcs &&
634 tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_TESS_CTRL))
635 return true;
636
637 if (tc->seen_tes &&
638 tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_TESS_EVAL))
639 return true;
640
641 if (tc->seen_gs &&
642 tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_GEOMETRY))
643 return true;
644
645 return false;
646 }
647
648 static bool
tc_is_buffer_busy(struct threaded_context * tc,struct threaded_resource * tbuf,unsigned map_usage)649 tc_is_buffer_busy(struct threaded_context *tc, struct threaded_resource *tbuf,
650 unsigned map_usage)
651 {
652 if (!tc->options.is_resource_busy)
653 return true;
654
655 uint32_t id_hash = tbuf->buffer_id_unique & TC_BUFFER_ID_MASK;
656
657 for (unsigned i = 0; i < TC_MAX_BUFFER_LISTS; i++) {
658 struct tc_buffer_list *buf_list = &tc->buffer_lists[i];
659
660 /* If the buffer is referenced by a batch that hasn't been flushed (by tc or the driver),
661 * then the buffer is considered busy. */
662 if (!util_queue_fence_is_signalled(&buf_list->driver_flushed_fence) &&
663 BITSET_TEST(buf_list->buffer_list, id_hash))
664 return true;
665 }
666
667 /* The buffer isn't referenced by any unflushed batch: we can safely ask to the driver whether
668 * this buffer is busy or not. */
669 return tc->options.is_resource_busy(tc->pipe->screen, tbuf->latest, map_usage);
670 }
671
672 void
threaded_resource_init(struct pipe_resource * res)673 threaded_resource_init(struct pipe_resource *res)
674 {
675 struct threaded_resource *tres = threaded_resource(res);
676
677 tres->latest = &tres->b;
678 util_range_init(&tres->valid_buffer_range);
679 tres->is_shared = false;
680 tres->is_user_ptr = false;
681 tres->buffer_id_unique = 0;
682 tres->pending_staging_uploads = 0;
683 util_range_init(&tres->pending_staging_uploads_range);
684 }
685
686 void
threaded_resource_deinit(struct pipe_resource * res)687 threaded_resource_deinit(struct pipe_resource *res)
688 {
689 struct threaded_resource *tres = threaded_resource(res);
690
691 if (tres->latest != &tres->b)
692 pipe_resource_reference(&tres->latest, NULL);
693 util_range_destroy(&tres->valid_buffer_range);
694 util_range_destroy(&tres->pending_staging_uploads_range);
695 }
696
697 struct pipe_context *
threaded_context_unwrap_sync(struct pipe_context * pipe)698 threaded_context_unwrap_sync(struct pipe_context *pipe)
699 {
700 if (!pipe || !pipe->priv)
701 return pipe;
702
703 tc_sync(threaded_context(pipe));
704 return (struct pipe_context*)pipe->priv;
705 }
706
707
708 /********************************************************************
709 * simple functions
710 */
711
712 #define TC_FUNC1(func, qualifier, type, deref, addr, ...) \
713 struct tc_call_##func { \
714 struct tc_call_base base; \
715 type state; \
716 }; \
717 \
718 static uint16_t \
719 tc_call_##func(struct pipe_context *pipe, void *call, uint64_t *last) \
720 { \
721 pipe->func(pipe, addr(to_call(call, tc_call_##func)->state)); \
722 return call_size(tc_call_##func); \
723 } \
724 \
725 static void \
726 tc_##func(struct pipe_context *_pipe, qualifier type deref param) \
727 { \
728 struct threaded_context *tc = threaded_context(_pipe); \
729 struct tc_call_##func *p = (struct tc_call_##func*) \
730 tc_add_call(tc, TC_CALL_##func, tc_call_##func); \
731 p->state = deref(param); \
732 __VA_ARGS__; \
733 }
734
735 TC_FUNC1(set_active_query_state, , bool, , )
736
737 TC_FUNC1(set_blend_color, const, struct pipe_blend_color, *, &)
738 TC_FUNC1(set_stencil_ref, const, struct pipe_stencil_ref, , )
739 TC_FUNC1(set_clip_state, const, struct pipe_clip_state, *, &)
740 TC_FUNC1(set_sample_mask, , unsigned, , )
741 TC_FUNC1(set_min_samples, , unsigned, , )
742 TC_FUNC1(set_polygon_stipple, const, struct pipe_poly_stipple, *, &)
743
744 TC_FUNC1(texture_barrier, , unsigned, , )
745 TC_FUNC1(memory_barrier, , unsigned, , )
746 TC_FUNC1(delete_texture_handle, , uint64_t, , )
747 TC_FUNC1(delete_image_handle, , uint64_t, , )
748 TC_FUNC1(set_frontend_noop, , bool, , )
749
750
751 /********************************************************************
752 * queries
753 */
754
755 static struct pipe_query *
tc_create_query(struct pipe_context * _pipe,unsigned query_type,unsigned index)756 tc_create_query(struct pipe_context *_pipe, unsigned query_type,
757 unsigned index)
758 {
759 struct threaded_context *tc = threaded_context(_pipe);
760 struct pipe_context *pipe = tc->pipe;
761
762 return pipe->create_query(pipe, query_type, index);
763 }
764
765 static struct pipe_query *
tc_create_batch_query(struct pipe_context * _pipe,unsigned num_queries,unsigned * query_types)766 tc_create_batch_query(struct pipe_context *_pipe, unsigned num_queries,
767 unsigned *query_types)
768 {
769 struct threaded_context *tc = threaded_context(_pipe);
770 struct pipe_context *pipe = tc->pipe;
771
772 return pipe->create_batch_query(pipe, num_queries, query_types);
773 }
774
775 struct tc_query_call {
776 struct tc_call_base base;
777 struct pipe_query *query;
778 };
779
780 static uint16_t
tc_call_destroy_query(struct pipe_context * pipe,void * call,uint64_t * last)781 tc_call_destroy_query(struct pipe_context *pipe, void *call, uint64_t *last)
782 {
783 struct pipe_query *query = to_call(call, tc_query_call)->query;
784 struct threaded_query *tq = threaded_query(query);
785
786 if (list_is_linked(&tq->head_unflushed))
787 list_del(&tq->head_unflushed);
788
789 pipe->destroy_query(pipe, query);
790 return call_size(tc_query_call);
791 }
792
793 static void
tc_destroy_query(struct pipe_context * _pipe,struct pipe_query * query)794 tc_destroy_query(struct pipe_context *_pipe, struct pipe_query *query)
795 {
796 struct threaded_context *tc = threaded_context(_pipe);
797
798 tc_add_call(tc, TC_CALL_destroy_query, tc_query_call)->query = query;
799 }
800
801 static uint16_t
tc_call_begin_query(struct pipe_context * pipe,void * call,uint64_t * last)802 tc_call_begin_query(struct pipe_context *pipe, void *call, uint64_t *last)
803 {
804 pipe->begin_query(pipe, to_call(call, tc_query_call)->query);
805 return call_size(tc_query_call);
806 }
807
808 static bool
tc_begin_query(struct pipe_context * _pipe,struct pipe_query * query)809 tc_begin_query(struct pipe_context *_pipe, struct pipe_query *query)
810 {
811 struct threaded_context *tc = threaded_context(_pipe);
812
813 tc_add_call(tc, TC_CALL_begin_query, tc_query_call)->query = query;
814 return true; /* we don't care about the return value for this call */
815 }
816
817 struct tc_end_query_call {
818 struct tc_call_base base;
819 struct threaded_context *tc;
820 struct pipe_query *query;
821 };
822
823 static uint16_t
tc_call_end_query(struct pipe_context * pipe,void * call,uint64_t * last)824 tc_call_end_query(struct pipe_context *pipe, void *call, uint64_t *last)
825 {
826 struct tc_end_query_call *p = to_call(call, tc_end_query_call);
827 struct threaded_query *tq = threaded_query(p->query);
828
829 if (!list_is_linked(&tq->head_unflushed))
830 list_add(&tq->head_unflushed, &p->tc->unflushed_queries);
831
832 pipe->end_query(pipe, p->query);
833 return call_size(tc_end_query_call);
834 }
835
836 static bool
tc_end_query(struct pipe_context * _pipe,struct pipe_query * query)837 tc_end_query(struct pipe_context *_pipe, struct pipe_query *query)
838 {
839 struct threaded_context *tc = threaded_context(_pipe);
840 struct threaded_query *tq = threaded_query(query);
841 struct tc_end_query_call *call =
842 tc_add_call(tc, TC_CALL_end_query, tc_end_query_call);
843
844 call->tc = tc;
845 call->query = query;
846
847 tq->flushed = false;
848
849 return true; /* we don't care about the return value for this call */
850 }
851
852 static bool
tc_get_query_result(struct pipe_context * _pipe,struct pipe_query * query,bool wait,union pipe_query_result * result)853 tc_get_query_result(struct pipe_context *_pipe,
854 struct pipe_query *query, bool wait,
855 union pipe_query_result *result)
856 {
857 struct threaded_context *tc = threaded_context(_pipe);
858 struct threaded_query *tq = threaded_query(query);
859 struct pipe_context *pipe = tc->pipe;
860 bool flushed = tq->flushed;
861
862 if (!flushed) {
863 tc_sync_msg(tc, wait ? "wait" : "nowait");
864 tc_set_driver_thread(tc);
865 }
866
867 bool success = pipe->get_query_result(pipe, query, wait, result);
868
869 if (!flushed)
870 tc_clear_driver_thread(tc);
871
872 if (success) {
873 tq->flushed = true;
874 if (list_is_linked(&tq->head_unflushed)) {
875 /* This is safe because it can only happen after we sync'd. */
876 list_del(&tq->head_unflushed);
877 }
878 }
879 return success;
880 }
881
882 struct tc_query_result_resource {
883 struct tc_call_base base;
884 bool wait;
885 enum pipe_query_value_type result_type:8;
886 int8_t index; /* it can be -1 */
887 unsigned offset;
888 struct pipe_query *query;
889 struct pipe_resource *resource;
890 };
891
892 static uint16_t
tc_call_get_query_result_resource(struct pipe_context * pipe,void * call,uint64_t * last)893 tc_call_get_query_result_resource(struct pipe_context *pipe, void *call, uint64_t *last)
894 {
895 struct tc_query_result_resource *p = to_call(call, tc_query_result_resource);
896
897 pipe->get_query_result_resource(pipe, p->query, p->wait, p->result_type,
898 p->index, p->resource, p->offset);
899 tc_drop_resource_reference(p->resource);
900 return call_size(tc_query_result_resource);
901 }
902
903 static void
tc_get_query_result_resource(struct pipe_context * _pipe,struct pipe_query * query,bool wait,enum pipe_query_value_type result_type,int index,struct pipe_resource * resource,unsigned offset)904 tc_get_query_result_resource(struct pipe_context *_pipe,
905 struct pipe_query *query, bool wait,
906 enum pipe_query_value_type result_type, int index,
907 struct pipe_resource *resource, unsigned offset)
908 {
909 struct threaded_context *tc = threaded_context(_pipe);
910 struct tc_query_result_resource *p =
911 tc_add_call(tc, TC_CALL_get_query_result_resource,
912 tc_query_result_resource);
913
914 p->query = query;
915 p->wait = wait;
916 p->result_type = result_type;
917 p->index = index;
918 tc_set_resource_reference(&p->resource, resource);
919 tc_add_to_buffer_list(&tc->buffer_lists[tc->next_buf_list], resource);
920 p->offset = offset;
921 }
922
923 struct tc_render_condition {
924 struct tc_call_base base;
925 bool condition;
926 unsigned mode;
927 struct pipe_query *query;
928 };
929
930 static uint16_t
tc_call_render_condition(struct pipe_context * pipe,void * call,uint64_t * last)931 tc_call_render_condition(struct pipe_context *pipe, void *call, uint64_t *last)
932 {
933 struct tc_render_condition *p = to_call(call, tc_render_condition);
934 pipe->render_condition(pipe, p->query, p->condition, p->mode);
935 return call_size(tc_render_condition);
936 }
937
938 static void
tc_render_condition(struct pipe_context * _pipe,struct pipe_query * query,bool condition,enum pipe_render_cond_flag mode)939 tc_render_condition(struct pipe_context *_pipe,
940 struct pipe_query *query, bool condition,
941 enum pipe_render_cond_flag mode)
942 {
943 struct threaded_context *tc = threaded_context(_pipe);
944 struct tc_render_condition *p =
945 tc_add_call(tc, TC_CALL_render_condition, tc_render_condition);
946
947 p->query = query;
948 p->condition = condition;
949 p->mode = mode;
950 }
951
952
953 /********************************************************************
954 * constant (immutable) states
955 */
956
957 #define TC_CSO_CREATE(name, sname) \
958 static void * \
959 tc_create_##name##_state(struct pipe_context *_pipe, \
960 const struct pipe_##sname##_state *state) \
961 { \
962 struct pipe_context *pipe = threaded_context(_pipe)->pipe; \
963 return pipe->create_##name##_state(pipe, state); \
964 }
965
966 #define TC_CSO_BIND(name, ...) TC_FUNC1(bind_##name##_state, , void *, , , ##__VA_ARGS__)
967 #define TC_CSO_DELETE(name) TC_FUNC1(delete_##name##_state, , void *, , )
968
969 #define TC_CSO(name, sname, ...) \
970 TC_CSO_CREATE(name, sname) \
971 TC_CSO_BIND(name, ##__VA_ARGS__) \
972 TC_CSO_DELETE(name)
973
974 #define TC_CSO_WHOLE(name) TC_CSO(name, name)
975 #define TC_CSO_SHADER(name) TC_CSO(name, shader)
976 #define TC_CSO_SHADER_TRACK(name) TC_CSO(name, shader, tc->seen_##name = true;)
977
978 TC_CSO_WHOLE(blend)
TC_CSO_WHOLE(rasterizer)979 TC_CSO_WHOLE(rasterizer)
980 TC_CSO_WHOLE(depth_stencil_alpha)
981 TC_CSO_WHOLE(compute)
982 TC_CSO_SHADER(fs)
983 TC_CSO_SHADER(vs)
984 TC_CSO_SHADER_TRACK(gs)
985 TC_CSO_SHADER_TRACK(tcs)
986 TC_CSO_SHADER_TRACK(tes)
987 TC_CSO_CREATE(sampler, sampler)
988 TC_CSO_DELETE(sampler)
989 TC_CSO_BIND(vertex_elements)
990 TC_CSO_DELETE(vertex_elements)
991
992 static void *
993 tc_create_vertex_elements_state(struct pipe_context *_pipe, unsigned count,
994 const struct pipe_vertex_element *elems)
995 {
996 struct pipe_context *pipe = threaded_context(_pipe)->pipe;
997
998 return pipe->create_vertex_elements_state(pipe, count, elems);
999 }
1000
1001 struct tc_sampler_states {
1002 struct tc_call_base base;
1003 ubyte shader, start, count;
1004 void *slot[0]; /* more will be allocated if needed */
1005 };
1006
1007 static uint16_t
tc_call_bind_sampler_states(struct pipe_context * pipe,void * call,uint64_t * last)1008 tc_call_bind_sampler_states(struct pipe_context *pipe, void *call, uint64_t *last)
1009 {
1010 struct tc_sampler_states *p = (struct tc_sampler_states *)call;
1011
1012 pipe->bind_sampler_states(pipe, p->shader, p->start, p->count, p->slot);
1013 return p->base.num_slots;
1014 }
1015
1016 static void
tc_bind_sampler_states(struct pipe_context * _pipe,enum pipe_shader_type shader,unsigned start,unsigned count,void ** states)1017 tc_bind_sampler_states(struct pipe_context *_pipe,
1018 enum pipe_shader_type shader,
1019 unsigned start, unsigned count, void **states)
1020 {
1021 if (!count)
1022 return;
1023
1024 struct threaded_context *tc = threaded_context(_pipe);
1025 struct tc_sampler_states *p =
1026 tc_add_slot_based_call(tc, TC_CALL_bind_sampler_states, tc_sampler_states, count);
1027
1028 p->shader = shader;
1029 p->start = start;
1030 p->count = count;
1031 memcpy(p->slot, states, count * sizeof(states[0]));
1032 }
1033
1034
1035 /********************************************************************
1036 * immediate states
1037 */
1038
1039 struct tc_framebuffer {
1040 struct tc_call_base base;
1041 struct pipe_framebuffer_state state;
1042 };
1043
1044 static uint16_t
tc_call_set_framebuffer_state(struct pipe_context * pipe,void * call,uint64_t * last)1045 tc_call_set_framebuffer_state(struct pipe_context *pipe, void *call, uint64_t *last)
1046 {
1047 struct pipe_framebuffer_state *p = &to_call(call, tc_framebuffer)->state;
1048
1049 pipe->set_framebuffer_state(pipe, p);
1050
1051 unsigned nr_cbufs = p->nr_cbufs;
1052 for (unsigned i = 0; i < nr_cbufs; i++)
1053 tc_drop_surface_reference(p->cbufs[i]);
1054 tc_drop_surface_reference(p->zsbuf);
1055 return call_size(tc_framebuffer);
1056 }
1057
1058 static void
tc_set_framebuffer_state(struct pipe_context * _pipe,const struct pipe_framebuffer_state * fb)1059 tc_set_framebuffer_state(struct pipe_context *_pipe,
1060 const struct pipe_framebuffer_state *fb)
1061 {
1062 struct threaded_context *tc = threaded_context(_pipe);
1063 struct tc_framebuffer *p =
1064 tc_add_call(tc, TC_CALL_set_framebuffer_state, tc_framebuffer);
1065 unsigned nr_cbufs = fb->nr_cbufs;
1066
1067 p->state.width = fb->width;
1068 p->state.height = fb->height;
1069 p->state.samples = fb->samples;
1070 p->state.layers = fb->layers;
1071 p->state.nr_cbufs = nr_cbufs;
1072
1073 for (unsigned i = 0; i < nr_cbufs; i++) {
1074 p->state.cbufs[i] = NULL;
1075 pipe_surface_reference(&p->state.cbufs[i], fb->cbufs[i]);
1076 }
1077 p->state.zsbuf = NULL;
1078 pipe_surface_reference(&p->state.zsbuf, fb->zsbuf);
1079 }
1080
1081 struct tc_tess_state {
1082 struct tc_call_base base;
1083 float state[6];
1084 };
1085
1086 static uint16_t
tc_call_set_tess_state(struct pipe_context * pipe,void * call,uint64_t * last)1087 tc_call_set_tess_state(struct pipe_context *pipe, void *call, uint64_t *last)
1088 {
1089 float *p = to_call(call, tc_tess_state)->state;
1090
1091 pipe->set_tess_state(pipe, p, p + 4);
1092 return call_size(tc_tess_state);
1093 }
1094
1095 static void
tc_set_tess_state(struct pipe_context * _pipe,const float default_outer_level[4],const float default_inner_level[2])1096 tc_set_tess_state(struct pipe_context *_pipe,
1097 const float default_outer_level[4],
1098 const float default_inner_level[2])
1099 {
1100 struct threaded_context *tc = threaded_context(_pipe);
1101 float *p = tc_add_call(tc, TC_CALL_set_tess_state, tc_tess_state)->state;
1102
1103 memcpy(p, default_outer_level, 4 * sizeof(float));
1104 memcpy(p + 4, default_inner_level, 2 * sizeof(float));
1105 }
1106
1107 struct tc_patch_vertices {
1108 struct tc_call_base base;
1109 ubyte patch_vertices;
1110 };
1111
1112 static uint16_t
tc_call_set_patch_vertices(struct pipe_context * pipe,void * call,uint64_t * last)1113 tc_call_set_patch_vertices(struct pipe_context *pipe, void *call, uint64_t *last)
1114 {
1115 uint8_t patch_vertices = to_call(call, tc_patch_vertices)->patch_vertices;
1116
1117 pipe->set_patch_vertices(pipe, patch_vertices);
1118 return call_size(tc_patch_vertices);
1119 }
1120
1121 static void
tc_set_patch_vertices(struct pipe_context * _pipe,uint8_t patch_vertices)1122 tc_set_patch_vertices(struct pipe_context *_pipe, uint8_t patch_vertices)
1123 {
1124 struct threaded_context *tc = threaded_context(_pipe);
1125
1126 tc_add_call(tc, TC_CALL_set_patch_vertices,
1127 tc_patch_vertices)->patch_vertices = patch_vertices;
1128 }
1129
1130 struct tc_constant_buffer_base {
1131 struct tc_call_base base;
1132 ubyte shader, index;
1133 bool is_null;
1134 };
1135
1136 struct tc_constant_buffer {
1137 struct tc_constant_buffer_base base;
1138 struct pipe_constant_buffer cb;
1139 };
1140
1141 static uint16_t
tc_call_set_constant_buffer(struct pipe_context * pipe,void * call,uint64_t * last)1142 tc_call_set_constant_buffer(struct pipe_context *pipe, void *call, uint64_t *last)
1143 {
1144 struct tc_constant_buffer *p = (struct tc_constant_buffer *)call;
1145
1146 if (unlikely(p->base.is_null)) {
1147 pipe->set_constant_buffer(pipe, p->base.shader, p->base.index, false, NULL);
1148 return call_size(tc_constant_buffer_base);
1149 }
1150
1151 pipe->set_constant_buffer(pipe, p->base.shader, p->base.index, true, &p->cb);
1152 return call_size(tc_constant_buffer);
1153 }
1154
1155 static void
tc_set_constant_buffer(struct pipe_context * _pipe,enum pipe_shader_type shader,uint index,bool take_ownership,const struct pipe_constant_buffer * cb)1156 tc_set_constant_buffer(struct pipe_context *_pipe,
1157 enum pipe_shader_type shader, uint index,
1158 bool take_ownership,
1159 const struct pipe_constant_buffer *cb)
1160 {
1161 struct threaded_context *tc = threaded_context(_pipe);
1162
1163 if (unlikely(!cb || (!cb->buffer && !cb->user_buffer))) {
1164 struct tc_constant_buffer_base *p =
1165 tc_add_call(tc, TC_CALL_set_constant_buffer, tc_constant_buffer_base);
1166 p->shader = shader;
1167 p->index = index;
1168 p->is_null = true;
1169 tc_unbind_buffer(&tc->const_buffers[shader][index]);
1170 return;
1171 }
1172
1173 struct pipe_resource *buffer;
1174 unsigned offset;
1175
1176 if (cb->user_buffer) {
1177 /* This must be done before adding set_constant_buffer, because it could
1178 * generate e.g. transfer_unmap and flush partially-uninitialized
1179 * set_constant_buffer to the driver if it was done afterwards.
1180 */
1181 buffer = NULL;
1182 u_upload_data(tc->base.const_uploader, 0, cb->buffer_size,
1183 tc->ubo_alignment, cb->user_buffer, &offset, &buffer);
1184 u_upload_unmap(tc->base.const_uploader);
1185 take_ownership = true;
1186 } else {
1187 buffer = cb->buffer;
1188 offset = cb->buffer_offset;
1189 }
1190
1191 struct tc_constant_buffer *p =
1192 tc_add_call(tc, TC_CALL_set_constant_buffer, tc_constant_buffer);
1193 p->base.shader = shader;
1194 p->base.index = index;
1195 p->base.is_null = false;
1196 p->cb.user_buffer = NULL;
1197 p->cb.buffer_offset = offset;
1198 p->cb.buffer_size = cb->buffer_size;
1199
1200 if (take_ownership)
1201 p->cb.buffer = buffer;
1202 else
1203 tc_set_resource_reference(&p->cb.buffer, buffer);
1204
1205 if (buffer) {
1206 tc_bind_buffer(&tc->const_buffers[shader][index],
1207 &tc->buffer_lists[tc->next_buf_list], buffer);
1208 } else {
1209 tc_unbind_buffer(&tc->const_buffers[shader][index]);
1210 }
1211 }
1212
1213 struct tc_inlinable_constants {
1214 struct tc_call_base base;
1215 ubyte shader;
1216 ubyte num_values;
1217 uint32_t values[MAX_INLINABLE_UNIFORMS];
1218 };
1219
1220 static uint16_t
tc_call_set_inlinable_constants(struct pipe_context * pipe,void * call,uint64_t * last)1221 tc_call_set_inlinable_constants(struct pipe_context *pipe, void *call, uint64_t *last)
1222 {
1223 struct tc_inlinable_constants *p = to_call(call, tc_inlinable_constants);
1224
1225 pipe->set_inlinable_constants(pipe, p->shader, p->num_values, p->values);
1226 return call_size(tc_inlinable_constants);
1227 }
1228
1229 static void
tc_set_inlinable_constants(struct pipe_context * _pipe,enum pipe_shader_type shader,uint num_values,uint32_t * values)1230 tc_set_inlinable_constants(struct pipe_context *_pipe,
1231 enum pipe_shader_type shader,
1232 uint num_values, uint32_t *values)
1233 {
1234 struct threaded_context *tc = threaded_context(_pipe);
1235 struct tc_inlinable_constants *p =
1236 tc_add_call(tc, TC_CALL_set_inlinable_constants, tc_inlinable_constants);
1237 p->shader = shader;
1238 p->num_values = num_values;
1239 memcpy(p->values, values, num_values * 4);
1240 }
1241
1242 struct tc_sample_locations {
1243 struct tc_call_base base;
1244 uint16_t size;
1245 uint8_t slot[0];
1246 };
1247
1248
1249 static uint16_t
tc_call_set_sample_locations(struct pipe_context * pipe,void * call,uint64_t * last)1250 tc_call_set_sample_locations(struct pipe_context *pipe, void *call, uint64_t *last)
1251 {
1252 struct tc_sample_locations *p = (struct tc_sample_locations *)call;
1253
1254 pipe->set_sample_locations(pipe, p->size, p->slot);
1255 return p->base.num_slots;
1256 }
1257
1258 static void
tc_set_sample_locations(struct pipe_context * _pipe,size_t size,const uint8_t * locations)1259 tc_set_sample_locations(struct pipe_context *_pipe, size_t size, const uint8_t *locations)
1260 {
1261 struct threaded_context *tc = threaded_context(_pipe);
1262 struct tc_sample_locations *p =
1263 tc_add_slot_based_call(tc, TC_CALL_set_sample_locations,
1264 tc_sample_locations, size);
1265
1266 p->size = size;
1267 memcpy(p->slot, locations, size);
1268 }
1269
1270 struct tc_scissors {
1271 struct tc_call_base base;
1272 ubyte start, count;
1273 struct pipe_scissor_state slot[0]; /* more will be allocated if needed */
1274 };
1275
1276 static uint16_t
tc_call_set_scissor_states(struct pipe_context * pipe,void * call,uint64_t * last)1277 tc_call_set_scissor_states(struct pipe_context *pipe, void *call, uint64_t *last)
1278 {
1279 struct tc_scissors *p = (struct tc_scissors *)call;
1280
1281 pipe->set_scissor_states(pipe, p->start, p->count, p->slot);
1282 return p->base.num_slots;
1283 }
1284
1285 static void
tc_set_scissor_states(struct pipe_context * _pipe,unsigned start,unsigned count,const struct pipe_scissor_state * states)1286 tc_set_scissor_states(struct pipe_context *_pipe,
1287 unsigned start, unsigned count,
1288 const struct pipe_scissor_state *states)
1289 {
1290 struct threaded_context *tc = threaded_context(_pipe);
1291 struct tc_scissors *p =
1292 tc_add_slot_based_call(tc, TC_CALL_set_scissor_states, tc_scissors, count);
1293
1294 p->start = start;
1295 p->count = count;
1296 memcpy(&p->slot, states, count * sizeof(states[0]));
1297 }
1298
1299 struct tc_viewports {
1300 struct tc_call_base base;
1301 ubyte start, count;
1302 struct pipe_viewport_state slot[0]; /* more will be allocated if needed */
1303 };
1304
1305 static uint16_t
tc_call_set_viewport_states(struct pipe_context * pipe,void * call,uint64_t * last)1306 tc_call_set_viewport_states(struct pipe_context *pipe, void *call, uint64_t *last)
1307 {
1308 struct tc_viewports *p = (struct tc_viewports *)call;
1309
1310 pipe->set_viewport_states(pipe, p->start, p->count, p->slot);
1311 return p->base.num_slots;
1312 }
1313
1314 static void
tc_set_viewport_states(struct pipe_context * _pipe,unsigned start,unsigned count,const struct pipe_viewport_state * states)1315 tc_set_viewport_states(struct pipe_context *_pipe,
1316 unsigned start, unsigned count,
1317 const struct pipe_viewport_state *states)
1318 {
1319 if (!count)
1320 return;
1321
1322 struct threaded_context *tc = threaded_context(_pipe);
1323 struct tc_viewports *p =
1324 tc_add_slot_based_call(tc, TC_CALL_set_viewport_states, tc_viewports, count);
1325
1326 p->start = start;
1327 p->count = count;
1328 memcpy(&p->slot, states, count * sizeof(states[0]));
1329 }
1330
1331 struct tc_window_rects {
1332 struct tc_call_base base;
1333 bool include;
1334 ubyte count;
1335 struct pipe_scissor_state slot[0]; /* more will be allocated if needed */
1336 };
1337
1338 static uint16_t
tc_call_set_window_rectangles(struct pipe_context * pipe,void * call,uint64_t * last)1339 tc_call_set_window_rectangles(struct pipe_context *pipe, void *call, uint64_t *last)
1340 {
1341 struct tc_window_rects *p = (struct tc_window_rects *)call;
1342
1343 pipe->set_window_rectangles(pipe, p->include, p->count, p->slot);
1344 return p->base.num_slots;
1345 }
1346
1347 static void
tc_set_window_rectangles(struct pipe_context * _pipe,bool include,unsigned count,const struct pipe_scissor_state * rects)1348 tc_set_window_rectangles(struct pipe_context *_pipe, bool include,
1349 unsigned count,
1350 const struct pipe_scissor_state *rects)
1351 {
1352 struct threaded_context *tc = threaded_context(_pipe);
1353 struct tc_window_rects *p =
1354 tc_add_slot_based_call(tc, TC_CALL_set_window_rectangles, tc_window_rects, count);
1355
1356 p->include = include;
1357 p->count = count;
1358 memcpy(p->slot, rects, count * sizeof(rects[0]));
1359 }
1360
1361 struct tc_sampler_views {
1362 struct tc_call_base base;
1363 ubyte shader, start, count, unbind_num_trailing_slots;
1364 struct pipe_sampler_view *slot[0]; /* more will be allocated if needed */
1365 };
1366
1367 static uint16_t
tc_call_set_sampler_views(struct pipe_context * pipe,void * call,uint64_t * last)1368 tc_call_set_sampler_views(struct pipe_context *pipe, void *call, uint64_t *last)
1369 {
1370 struct tc_sampler_views *p = (struct tc_sampler_views *)call;
1371
1372 pipe->set_sampler_views(pipe, p->shader, p->start, p->count,
1373 p->unbind_num_trailing_slots, true, p->slot);
1374 return p->base.num_slots;
1375 }
1376
1377 static void
tc_set_sampler_views(struct pipe_context * _pipe,enum pipe_shader_type shader,unsigned start,unsigned count,unsigned unbind_num_trailing_slots,bool take_ownership,struct pipe_sampler_view ** views)1378 tc_set_sampler_views(struct pipe_context *_pipe,
1379 enum pipe_shader_type shader,
1380 unsigned start, unsigned count,
1381 unsigned unbind_num_trailing_slots, bool take_ownership,
1382 struct pipe_sampler_view **views)
1383 {
1384 if (!count && !unbind_num_trailing_slots)
1385 return;
1386
1387 struct threaded_context *tc = threaded_context(_pipe);
1388 struct tc_sampler_views *p =
1389 tc_add_slot_based_call(tc, TC_CALL_set_sampler_views, tc_sampler_views,
1390 views ? count : 0);
1391
1392 p->shader = shader;
1393 p->start = start;
1394
1395 if (views) {
1396 struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
1397
1398 p->count = count;
1399 p->unbind_num_trailing_slots = unbind_num_trailing_slots;
1400
1401 if (take_ownership) {
1402 memcpy(p->slot, views, sizeof(*views) * count);
1403
1404 for (unsigned i = 0; i < count; i++) {
1405 if (views[i] && views[i]->target == PIPE_BUFFER) {
1406 tc_bind_buffer(&tc->sampler_buffers[shader][start + i], next,
1407 views[i]->texture);
1408 } else {
1409 tc_unbind_buffer(&tc->sampler_buffers[shader][start + i]);
1410 }
1411 }
1412 } else {
1413 for (unsigned i = 0; i < count; i++) {
1414 p->slot[i] = NULL;
1415 pipe_sampler_view_reference(&p->slot[i], views[i]);
1416
1417 if (views[i] && views[i]->target == PIPE_BUFFER) {
1418 tc_bind_buffer(&tc->sampler_buffers[shader][start + i], next,
1419 views[i]->texture);
1420 } else {
1421 tc_unbind_buffer(&tc->sampler_buffers[shader][start + i]);
1422 }
1423 }
1424 }
1425
1426 tc_unbind_buffers(&tc->sampler_buffers[shader][start + count],
1427 unbind_num_trailing_slots);
1428 tc->seen_sampler_buffers[shader] = true;
1429 } else {
1430 p->count = 0;
1431 p->unbind_num_trailing_slots = count + unbind_num_trailing_slots;
1432
1433 tc_unbind_buffers(&tc->sampler_buffers[shader][start],
1434 count + unbind_num_trailing_slots);
1435 }
1436 }
1437
1438 struct tc_shader_images {
1439 struct tc_call_base base;
1440 ubyte shader, start, count;
1441 ubyte unbind_num_trailing_slots;
1442 struct pipe_image_view slot[0]; /* more will be allocated if needed */
1443 };
1444
1445 static uint16_t
tc_call_set_shader_images(struct pipe_context * pipe,void * call,uint64_t * last)1446 tc_call_set_shader_images(struct pipe_context *pipe, void *call, uint64_t *last)
1447 {
1448 struct tc_shader_images *p = (struct tc_shader_images *)call;
1449 unsigned count = p->count;
1450
1451 if (!p->count) {
1452 pipe->set_shader_images(pipe, p->shader, p->start, 0,
1453 p->unbind_num_trailing_slots, NULL);
1454 return call_size(tc_shader_images);
1455 }
1456
1457 pipe->set_shader_images(pipe, p->shader, p->start, p->count,
1458 p->unbind_num_trailing_slots, p->slot);
1459
1460 for (unsigned i = 0; i < count; i++)
1461 tc_drop_resource_reference(p->slot[i].resource);
1462
1463 return p->base.num_slots;
1464 }
1465
1466 static void
tc_set_shader_images(struct pipe_context * _pipe,enum pipe_shader_type shader,unsigned start,unsigned count,unsigned unbind_num_trailing_slots,const struct pipe_image_view * images)1467 tc_set_shader_images(struct pipe_context *_pipe,
1468 enum pipe_shader_type shader,
1469 unsigned start, unsigned count,
1470 unsigned unbind_num_trailing_slots,
1471 const struct pipe_image_view *images)
1472 {
1473 if (!count && !unbind_num_trailing_slots)
1474 return;
1475
1476 struct threaded_context *tc = threaded_context(_pipe);
1477 struct tc_shader_images *p =
1478 tc_add_slot_based_call(tc, TC_CALL_set_shader_images, tc_shader_images,
1479 images ? count : 0);
1480 unsigned writable_buffers = 0;
1481
1482 p->shader = shader;
1483 p->start = start;
1484
1485 if (images) {
1486 p->count = count;
1487 p->unbind_num_trailing_slots = unbind_num_trailing_slots;
1488
1489 struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
1490
1491 for (unsigned i = 0; i < count; i++) {
1492 struct pipe_resource *resource = images[i].resource;
1493
1494 tc_set_resource_reference(&p->slot[i].resource, resource);
1495
1496 if (resource && resource->target == PIPE_BUFFER) {
1497 tc_bind_buffer(&tc->image_buffers[shader][start + i], next, resource);
1498
1499 if (images[i].access & PIPE_IMAGE_ACCESS_WRITE) {
1500 struct threaded_resource *tres = threaded_resource(resource);
1501
1502 util_range_add(&tres->b, &tres->valid_buffer_range,
1503 images[i].u.buf.offset,
1504 images[i].u.buf.offset + images[i].u.buf.size);
1505 writable_buffers |= BITFIELD_BIT(start + i);
1506 }
1507 } else {
1508 tc_unbind_buffer(&tc->image_buffers[shader][start + i]);
1509 }
1510 }
1511 memcpy(p->slot, images, count * sizeof(images[0]));
1512
1513 tc_unbind_buffers(&tc->image_buffers[shader][start + count],
1514 unbind_num_trailing_slots);
1515 tc->seen_image_buffers[shader] = true;
1516 } else {
1517 p->count = 0;
1518 p->unbind_num_trailing_slots = count + unbind_num_trailing_slots;
1519
1520 tc_unbind_buffers(&tc->image_buffers[shader][start],
1521 count + unbind_num_trailing_slots);
1522 }
1523
1524 tc->image_buffers_writeable_mask[shader] &= ~BITFIELD_RANGE(start, count);
1525 tc->image_buffers_writeable_mask[shader] |= writable_buffers;
1526 }
1527
1528 struct tc_shader_buffers {
1529 struct tc_call_base base;
1530 ubyte shader, start, count;
1531 bool unbind;
1532 unsigned writable_bitmask;
1533 struct pipe_shader_buffer slot[0]; /* more will be allocated if needed */
1534 };
1535
1536 static uint16_t
tc_call_set_shader_buffers(struct pipe_context * pipe,void * call,uint64_t * last)1537 tc_call_set_shader_buffers(struct pipe_context *pipe, void *call, uint64_t *last)
1538 {
1539 struct tc_shader_buffers *p = (struct tc_shader_buffers *)call;
1540 unsigned count = p->count;
1541
1542 if (p->unbind) {
1543 pipe->set_shader_buffers(pipe, p->shader, p->start, p->count, NULL, 0);
1544 return call_size(tc_shader_buffers);
1545 }
1546
1547 pipe->set_shader_buffers(pipe, p->shader, p->start, p->count, p->slot,
1548 p->writable_bitmask);
1549
1550 for (unsigned i = 0; i < count; i++)
1551 tc_drop_resource_reference(p->slot[i].buffer);
1552
1553 return p->base.num_slots;
1554 }
1555
1556 static void
tc_set_shader_buffers(struct pipe_context * _pipe,enum pipe_shader_type shader,unsigned start,unsigned count,const struct pipe_shader_buffer * buffers,unsigned writable_bitmask)1557 tc_set_shader_buffers(struct pipe_context *_pipe,
1558 enum pipe_shader_type shader,
1559 unsigned start, unsigned count,
1560 const struct pipe_shader_buffer *buffers,
1561 unsigned writable_bitmask)
1562 {
1563 if (!count)
1564 return;
1565
1566 struct threaded_context *tc = threaded_context(_pipe);
1567 struct tc_shader_buffers *p =
1568 tc_add_slot_based_call(tc, TC_CALL_set_shader_buffers, tc_shader_buffers,
1569 buffers ? count : 0);
1570
1571 p->shader = shader;
1572 p->start = start;
1573 p->count = count;
1574 p->unbind = buffers == NULL;
1575 p->writable_bitmask = writable_bitmask;
1576
1577 if (buffers) {
1578 struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
1579
1580 for (unsigned i = 0; i < count; i++) {
1581 struct pipe_shader_buffer *dst = &p->slot[i];
1582 const struct pipe_shader_buffer *src = buffers + i;
1583
1584 tc_set_resource_reference(&dst->buffer, src->buffer);
1585 dst->buffer_offset = src->buffer_offset;
1586 dst->buffer_size = src->buffer_size;
1587
1588 if (src->buffer) {
1589 struct threaded_resource *tres = threaded_resource(src->buffer);
1590
1591 tc_bind_buffer(&tc->shader_buffers[shader][start + i], next, &tres->b);
1592
1593 if (writable_bitmask & BITFIELD_BIT(i)) {
1594 util_range_add(&tres->b, &tres->valid_buffer_range,
1595 src->buffer_offset,
1596 src->buffer_offset + src->buffer_size);
1597 }
1598 } else {
1599 tc_unbind_buffer(&tc->shader_buffers[shader][start + i]);
1600 }
1601 }
1602 tc->seen_shader_buffers[shader] = true;
1603 } else {
1604 tc_unbind_buffers(&tc->shader_buffers[shader][start], count);
1605 }
1606
1607 tc->shader_buffers_writeable_mask[shader] &= ~BITFIELD_RANGE(start, count);
1608 tc->shader_buffers_writeable_mask[shader] |= writable_bitmask << start;
1609 }
1610
1611 struct tc_vertex_buffers {
1612 struct tc_call_base base;
1613 ubyte start, count;
1614 ubyte unbind_num_trailing_slots;
1615 struct pipe_vertex_buffer slot[0]; /* more will be allocated if needed */
1616 };
1617
1618 static uint16_t
tc_call_set_vertex_buffers(struct pipe_context * pipe,void * call,uint64_t * last)1619 tc_call_set_vertex_buffers(struct pipe_context *pipe, void *call, uint64_t *last)
1620 {
1621 struct tc_vertex_buffers *p = (struct tc_vertex_buffers *)call;
1622 unsigned count = p->count;
1623
1624 if (!count) {
1625 pipe->set_vertex_buffers(pipe, p->start, 0,
1626 p->unbind_num_trailing_slots, false, NULL);
1627 return call_size(tc_vertex_buffers);
1628 }
1629
1630 for (unsigned i = 0; i < count; i++)
1631 tc_assert(!p->slot[i].is_user_buffer);
1632
1633 pipe->set_vertex_buffers(pipe, p->start, count,
1634 p->unbind_num_trailing_slots, true, p->slot);
1635 return p->base.num_slots;
1636 }
1637
1638 static void
tc_set_vertex_buffers(struct pipe_context * _pipe,unsigned start,unsigned count,unsigned unbind_num_trailing_slots,bool take_ownership,const struct pipe_vertex_buffer * buffers)1639 tc_set_vertex_buffers(struct pipe_context *_pipe,
1640 unsigned start, unsigned count,
1641 unsigned unbind_num_trailing_slots,
1642 bool take_ownership,
1643 const struct pipe_vertex_buffer *buffers)
1644 {
1645 struct threaded_context *tc = threaded_context(_pipe);
1646
1647 if (!count && !unbind_num_trailing_slots)
1648 return;
1649
1650 if (count && buffers) {
1651 struct tc_vertex_buffers *p =
1652 tc_add_slot_based_call(tc, TC_CALL_set_vertex_buffers, tc_vertex_buffers, count);
1653 p->start = start;
1654 p->count = count;
1655 p->unbind_num_trailing_slots = unbind_num_trailing_slots;
1656
1657 struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
1658
1659 if (take_ownership) {
1660 memcpy(p->slot, buffers, count * sizeof(struct pipe_vertex_buffer));
1661
1662 for (unsigned i = 0; i < count; i++) {
1663 struct pipe_resource *buf = buffers[i].buffer.resource;
1664
1665 if (buf) {
1666 tc_bind_buffer(&tc->vertex_buffers[start + i], next, buf);
1667 } else {
1668 tc_unbind_buffer(&tc->vertex_buffers[start + i]);
1669 }
1670 }
1671 } else {
1672 for (unsigned i = 0; i < count; i++) {
1673 struct pipe_vertex_buffer *dst = &p->slot[i];
1674 const struct pipe_vertex_buffer *src = buffers + i;
1675 struct pipe_resource *buf = src->buffer.resource;
1676
1677 tc_assert(!src->is_user_buffer);
1678 dst->stride = src->stride;
1679 dst->is_user_buffer = false;
1680 tc_set_resource_reference(&dst->buffer.resource, buf);
1681 dst->buffer_offset = src->buffer_offset;
1682
1683 if (buf) {
1684 tc_bind_buffer(&tc->vertex_buffers[start + i], next, buf);
1685 } else {
1686 tc_unbind_buffer(&tc->vertex_buffers[start + i]);
1687 }
1688 }
1689 }
1690
1691 tc_unbind_buffers(&tc->vertex_buffers[start + count],
1692 unbind_num_trailing_slots);
1693 } else {
1694 struct tc_vertex_buffers *p =
1695 tc_add_slot_based_call(tc, TC_CALL_set_vertex_buffers, tc_vertex_buffers, 0);
1696 p->start = start;
1697 p->count = 0;
1698 p->unbind_num_trailing_slots = count + unbind_num_trailing_slots;
1699
1700 tc_unbind_buffers(&tc->vertex_buffers[start],
1701 count + unbind_num_trailing_slots);
1702 }
1703 }
1704
1705 struct tc_stream_outputs {
1706 struct tc_call_base base;
1707 unsigned count;
1708 struct pipe_stream_output_target *targets[PIPE_MAX_SO_BUFFERS];
1709 unsigned offsets[PIPE_MAX_SO_BUFFERS];
1710 };
1711
1712 static uint16_t
tc_call_set_stream_output_targets(struct pipe_context * pipe,void * call,uint64_t * last)1713 tc_call_set_stream_output_targets(struct pipe_context *pipe, void *call, uint64_t *last)
1714 {
1715 struct tc_stream_outputs *p = to_call(call, tc_stream_outputs);
1716 unsigned count = p->count;
1717
1718 pipe->set_stream_output_targets(pipe, count, p->targets, p->offsets);
1719 for (unsigned i = 0; i < count; i++)
1720 tc_drop_so_target_reference(p->targets[i]);
1721
1722 return call_size(tc_stream_outputs);
1723 }
1724
1725 static void
tc_set_stream_output_targets(struct pipe_context * _pipe,unsigned count,struct pipe_stream_output_target ** tgs,const unsigned * offsets)1726 tc_set_stream_output_targets(struct pipe_context *_pipe,
1727 unsigned count,
1728 struct pipe_stream_output_target **tgs,
1729 const unsigned *offsets)
1730 {
1731 struct threaded_context *tc = threaded_context(_pipe);
1732 struct tc_stream_outputs *p =
1733 tc_add_call(tc, TC_CALL_set_stream_output_targets, tc_stream_outputs);
1734 struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
1735
1736 for (unsigned i = 0; i < count; i++) {
1737 p->targets[i] = NULL;
1738 pipe_so_target_reference(&p->targets[i], tgs[i]);
1739 if (tgs[i]) {
1740 tc_bind_buffer(&tc->streamout_buffers[i], next, tgs[i]->buffer);
1741 } else {
1742 tc_unbind_buffer(&tc->streamout_buffers[i]);
1743 }
1744 }
1745 p->count = count;
1746 memcpy(p->offsets, offsets, count * sizeof(unsigned));
1747
1748 tc_unbind_buffers(&tc->streamout_buffers[count], PIPE_MAX_SO_BUFFERS - count);
1749 if (count)
1750 tc->seen_streamout_buffers = true;
1751 }
1752
1753 static void
tc_set_compute_resources(struct pipe_context * _pipe,unsigned start,unsigned count,struct pipe_surface ** resources)1754 tc_set_compute_resources(struct pipe_context *_pipe, unsigned start,
1755 unsigned count, struct pipe_surface **resources)
1756 {
1757 struct threaded_context *tc = threaded_context(_pipe);
1758 struct pipe_context *pipe = tc->pipe;
1759
1760 tc_sync(tc);
1761 pipe->set_compute_resources(pipe, start, count, resources);
1762 }
1763
1764 static void
tc_set_global_binding(struct pipe_context * _pipe,unsigned first,unsigned count,struct pipe_resource ** resources,uint32_t ** handles)1765 tc_set_global_binding(struct pipe_context *_pipe, unsigned first,
1766 unsigned count, struct pipe_resource **resources,
1767 uint32_t **handles)
1768 {
1769 struct threaded_context *tc = threaded_context(_pipe);
1770 struct pipe_context *pipe = tc->pipe;
1771
1772 tc_sync(tc);
1773 pipe->set_global_binding(pipe, first, count, resources, handles);
1774 }
1775
1776
1777 /********************************************************************
1778 * views
1779 */
1780
1781 static struct pipe_surface *
tc_create_surface(struct pipe_context * _pipe,struct pipe_resource * resource,const struct pipe_surface * surf_tmpl)1782 tc_create_surface(struct pipe_context *_pipe,
1783 struct pipe_resource *resource,
1784 const struct pipe_surface *surf_tmpl)
1785 {
1786 struct pipe_context *pipe = threaded_context(_pipe)->pipe;
1787 struct pipe_surface *view =
1788 pipe->create_surface(pipe, resource, surf_tmpl);
1789
1790 if (view)
1791 view->context = _pipe;
1792 return view;
1793 }
1794
1795 static void
tc_surface_destroy(struct pipe_context * _pipe,struct pipe_surface * surf)1796 tc_surface_destroy(struct pipe_context *_pipe,
1797 struct pipe_surface *surf)
1798 {
1799 struct pipe_context *pipe = threaded_context(_pipe)->pipe;
1800
1801 pipe->surface_destroy(pipe, surf);
1802 }
1803
1804 static struct pipe_sampler_view *
tc_create_sampler_view(struct pipe_context * _pipe,struct pipe_resource * resource,const struct pipe_sampler_view * templ)1805 tc_create_sampler_view(struct pipe_context *_pipe,
1806 struct pipe_resource *resource,
1807 const struct pipe_sampler_view *templ)
1808 {
1809 struct pipe_context *pipe = threaded_context(_pipe)->pipe;
1810 struct pipe_sampler_view *view =
1811 pipe->create_sampler_view(pipe, resource, templ);
1812
1813 if (view)
1814 view->context = _pipe;
1815 return view;
1816 }
1817
1818 static void
tc_sampler_view_destroy(struct pipe_context * _pipe,struct pipe_sampler_view * view)1819 tc_sampler_view_destroy(struct pipe_context *_pipe,
1820 struct pipe_sampler_view *view)
1821 {
1822 struct pipe_context *pipe = threaded_context(_pipe)->pipe;
1823
1824 pipe->sampler_view_destroy(pipe, view);
1825 }
1826
1827 static struct pipe_stream_output_target *
tc_create_stream_output_target(struct pipe_context * _pipe,struct pipe_resource * res,unsigned buffer_offset,unsigned buffer_size)1828 tc_create_stream_output_target(struct pipe_context *_pipe,
1829 struct pipe_resource *res,
1830 unsigned buffer_offset,
1831 unsigned buffer_size)
1832 {
1833 struct pipe_context *pipe = threaded_context(_pipe)->pipe;
1834 struct threaded_resource *tres = threaded_resource(res);
1835 struct pipe_stream_output_target *view;
1836
1837 util_range_add(&tres->b, &tres->valid_buffer_range, buffer_offset,
1838 buffer_offset + buffer_size);
1839
1840 view = pipe->create_stream_output_target(pipe, res, buffer_offset,
1841 buffer_size);
1842 if (view)
1843 view->context = _pipe;
1844 return view;
1845 }
1846
1847 static void
tc_stream_output_target_destroy(struct pipe_context * _pipe,struct pipe_stream_output_target * target)1848 tc_stream_output_target_destroy(struct pipe_context *_pipe,
1849 struct pipe_stream_output_target *target)
1850 {
1851 struct pipe_context *pipe = threaded_context(_pipe)->pipe;
1852
1853 pipe->stream_output_target_destroy(pipe, target);
1854 }
1855
1856
1857 /********************************************************************
1858 * bindless
1859 */
1860
1861 static uint64_t
tc_create_texture_handle(struct pipe_context * _pipe,struct pipe_sampler_view * view,const struct pipe_sampler_state * state)1862 tc_create_texture_handle(struct pipe_context *_pipe,
1863 struct pipe_sampler_view *view,
1864 const struct pipe_sampler_state *state)
1865 {
1866 struct threaded_context *tc = threaded_context(_pipe);
1867 struct pipe_context *pipe = tc->pipe;
1868
1869 tc_sync(tc);
1870 return pipe->create_texture_handle(pipe, view, state);
1871 }
1872
1873 struct tc_make_texture_handle_resident {
1874 struct tc_call_base base;
1875 bool resident;
1876 uint64_t handle;
1877 };
1878
1879 static uint16_t
tc_call_make_texture_handle_resident(struct pipe_context * pipe,void * call,uint64_t * last)1880 tc_call_make_texture_handle_resident(struct pipe_context *pipe, void *call, uint64_t *last)
1881 {
1882 struct tc_make_texture_handle_resident *p =
1883 to_call(call, tc_make_texture_handle_resident);
1884
1885 pipe->make_texture_handle_resident(pipe, p->handle, p->resident);
1886 return call_size(tc_make_texture_handle_resident);
1887 }
1888
1889 static void
tc_make_texture_handle_resident(struct pipe_context * _pipe,uint64_t handle,bool resident)1890 tc_make_texture_handle_resident(struct pipe_context *_pipe, uint64_t handle,
1891 bool resident)
1892 {
1893 struct threaded_context *tc = threaded_context(_pipe);
1894 struct tc_make_texture_handle_resident *p =
1895 tc_add_call(tc, TC_CALL_make_texture_handle_resident,
1896 tc_make_texture_handle_resident);
1897
1898 p->handle = handle;
1899 p->resident = resident;
1900 }
1901
1902 static uint64_t
tc_create_image_handle(struct pipe_context * _pipe,const struct pipe_image_view * image)1903 tc_create_image_handle(struct pipe_context *_pipe,
1904 const struct pipe_image_view *image)
1905 {
1906 struct threaded_context *tc = threaded_context(_pipe);
1907 struct pipe_context *pipe = tc->pipe;
1908
1909 tc_sync(tc);
1910 return pipe->create_image_handle(pipe, image);
1911 }
1912
1913 struct tc_make_image_handle_resident {
1914 struct tc_call_base base;
1915 bool resident;
1916 unsigned access;
1917 uint64_t handle;
1918 };
1919
1920 static uint16_t
tc_call_make_image_handle_resident(struct pipe_context * pipe,void * call,uint64_t * last)1921 tc_call_make_image_handle_resident(struct pipe_context *pipe, void *call, uint64_t *last)
1922 {
1923 struct tc_make_image_handle_resident *p =
1924 to_call(call, tc_make_image_handle_resident);
1925
1926 pipe->make_image_handle_resident(pipe, p->handle, p->access, p->resident);
1927 return call_size(tc_make_image_handle_resident);
1928 }
1929
1930 static void
tc_make_image_handle_resident(struct pipe_context * _pipe,uint64_t handle,unsigned access,bool resident)1931 tc_make_image_handle_resident(struct pipe_context *_pipe, uint64_t handle,
1932 unsigned access, bool resident)
1933 {
1934 struct threaded_context *tc = threaded_context(_pipe);
1935 struct tc_make_image_handle_resident *p =
1936 tc_add_call(tc, TC_CALL_make_image_handle_resident,
1937 tc_make_image_handle_resident);
1938
1939 p->handle = handle;
1940 p->access = access;
1941 p->resident = resident;
1942 }
1943
1944
1945 /********************************************************************
1946 * transfer
1947 */
1948
1949 struct tc_replace_buffer_storage {
1950 struct tc_call_base base;
1951 uint16_t num_rebinds;
1952 uint32_t rebind_mask;
1953 uint32_t delete_buffer_id;
1954 struct pipe_resource *dst;
1955 struct pipe_resource *src;
1956 tc_replace_buffer_storage_func func;
1957 };
1958
1959 static uint16_t
tc_call_replace_buffer_storage(struct pipe_context * pipe,void * call,uint64_t * last)1960 tc_call_replace_buffer_storage(struct pipe_context *pipe, void *call, uint64_t *last)
1961 {
1962 struct tc_replace_buffer_storage *p = to_call(call, tc_replace_buffer_storage);
1963
1964 p->func(pipe, p->dst, p->src, p->num_rebinds, p->rebind_mask, p->delete_buffer_id);
1965
1966 tc_drop_resource_reference(p->dst);
1967 tc_drop_resource_reference(p->src);
1968 return call_size(tc_replace_buffer_storage);
1969 }
1970
1971 /* Return true if the buffer has been invalidated or is idle. */
1972 static bool
tc_invalidate_buffer(struct threaded_context * tc,struct threaded_resource * tbuf)1973 tc_invalidate_buffer(struct threaded_context *tc,
1974 struct threaded_resource *tbuf)
1975 {
1976 if (!tc_is_buffer_busy(tc, tbuf, PIPE_MAP_READ_WRITE)) {
1977 /* It's idle, so invalidation would be a no-op, but we can still clear
1978 * the valid range because we are technically doing invalidation, but
1979 * skipping it because it's useless.
1980 *
1981 * If the buffer is bound for write, we can't invalidate the range.
1982 */
1983 if (!tc_is_buffer_bound_for_write(tc, tbuf->buffer_id_unique))
1984 util_range_set_empty(&tbuf->valid_buffer_range);
1985 return true;
1986 }
1987
1988 struct pipe_screen *screen = tc->base.screen;
1989 struct pipe_resource *new_buf;
1990
1991 /* Shared, pinned, and sparse buffers can't be reallocated. */
1992 if (tbuf->is_shared ||
1993 tbuf->is_user_ptr ||
1994 tbuf->b.flags & PIPE_RESOURCE_FLAG_SPARSE)
1995 return false;
1996
1997 /* Allocate a new one. */
1998 new_buf = screen->resource_create(screen, &tbuf->b);
1999 if (!new_buf)
2000 return false;
2001
2002 /* Replace the "latest" pointer. */
2003 if (tbuf->latest != &tbuf->b)
2004 pipe_resource_reference(&tbuf->latest, NULL);
2005
2006 tbuf->latest = new_buf;
2007
2008 uint32_t delete_buffer_id = tbuf->buffer_id_unique;
2009
2010 /* Enqueue storage replacement of the original buffer. */
2011 struct tc_replace_buffer_storage *p =
2012 tc_add_call(tc, TC_CALL_replace_buffer_storage,
2013 tc_replace_buffer_storage);
2014
2015 p->func = tc->replace_buffer_storage;
2016 tc_set_resource_reference(&p->dst, &tbuf->b);
2017 tc_set_resource_reference(&p->src, new_buf);
2018 p->delete_buffer_id = delete_buffer_id;
2019 p->rebind_mask = 0;
2020
2021 /* Treat the current buffer as the new buffer. */
2022 bool bound_for_write = tc_is_buffer_bound_for_write(tc, tbuf->buffer_id_unique);
2023 p->num_rebinds = tc_rebind_buffer(tc, tbuf->buffer_id_unique,
2024 threaded_resource(new_buf)->buffer_id_unique,
2025 &p->rebind_mask);
2026
2027 /* If the buffer is not bound for write, clear the valid range. */
2028 if (!bound_for_write)
2029 util_range_set_empty(&tbuf->valid_buffer_range);
2030
2031 tbuf->buffer_id_unique = threaded_resource(new_buf)->buffer_id_unique;
2032 threaded_resource(new_buf)->buffer_id_unique = 0;
2033
2034 return true;
2035 }
2036
2037 static unsigned
tc_improve_map_buffer_flags(struct threaded_context * tc,struct threaded_resource * tres,unsigned usage,unsigned offset,unsigned size)2038 tc_improve_map_buffer_flags(struct threaded_context *tc,
2039 struct threaded_resource *tres, unsigned usage,
2040 unsigned offset, unsigned size)
2041 {
2042 /* Never invalidate inside the driver and never infer "unsynchronized". */
2043 unsigned tc_flags = TC_TRANSFER_MAP_NO_INVALIDATE |
2044 TC_TRANSFER_MAP_NO_INFER_UNSYNCHRONIZED;
2045
2046 /* Prevent a reentry. */
2047 if (usage & tc_flags)
2048 return usage;
2049
2050 /* Use the staging upload if it's preferred. */
2051 if (usage & (PIPE_MAP_DISCARD_RANGE |
2052 PIPE_MAP_DISCARD_WHOLE_RESOURCE) &&
2053 !(usage & PIPE_MAP_PERSISTENT) &&
2054 tres->b.flags & PIPE_RESOURCE_FLAG_DONT_MAP_DIRECTLY &&
2055 tc->use_forced_staging_uploads) {
2056 usage &= ~(PIPE_MAP_DISCARD_WHOLE_RESOURCE |
2057 PIPE_MAP_UNSYNCHRONIZED);
2058
2059 return usage | tc_flags | PIPE_MAP_DISCARD_RANGE;
2060 }
2061
2062 /* Sparse buffers can't be mapped directly and can't be reallocated
2063 * (fully invalidated). That may just be a radeonsi limitation, but
2064 * the threaded context must obey it with radeonsi.
2065 */
2066 if (tres->b.flags & PIPE_RESOURCE_FLAG_SPARSE) {
2067 /* We can use DISCARD_RANGE instead of full discard. This is the only
2068 * fast path for sparse buffers that doesn't need thread synchronization.
2069 */
2070 if (usage & PIPE_MAP_DISCARD_WHOLE_RESOURCE)
2071 usage |= PIPE_MAP_DISCARD_RANGE;
2072
2073 /* Allow DISCARD_WHOLE_RESOURCE and infering UNSYNCHRONIZED in drivers.
2074 * The threaded context doesn't do unsychronized mappings and invalida-
2075 * tions of sparse buffers, therefore a correct driver behavior won't
2076 * result in an incorrect behavior with the threaded context.
2077 */
2078 return usage;
2079 }
2080
2081 usage |= tc_flags;
2082
2083 /* Handle CPU reads trivially. */
2084 if (usage & PIPE_MAP_READ) {
2085 if (usage & PIPE_MAP_UNSYNCHRONIZED)
2086 usage |= TC_TRANSFER_MAP_THREADED_UNSYNC; /* don't sync */
2087
2088 /* Drivers aren't allowed to do buffer invalidations. */
2089 return usage & ~PIPE_MAP_DISCARD_WHOLE_RESOURCE;
2090 }
2091
2092 /* See if the buffer range being mapped has never been initialized or
2093 * the buffer is idle, in which case it can be mapped unsynchronized. */
2094 if (!(usage & PIPE_MAP_UNSYNCHRONIZED) &&
2095 ((!tres->is_shared &&
2096 !util_ranges_intersect(&tres->valid_buffer_range, offset, offset + size)) ||
2097 !tc_is_buffer_busy(tc, tres, usage)))
2098 usage |= PIPE_MAP_UNSYNCHRONIZED;
2099
2100 if (!(usage & PIPE_MAP_UNSYNCHRONIZED)) {
2101 /* If discarding the entire range, discard the whole resource instead. */
2102 if (usage & PIPE_MAP_DISCARD_RANGE &&
2103 offset == 0 && size == tres->b.width0)
2104 usage |= PIPE_MAP_DISCARD_WHOLE_RESOURCE;
2105
2106 /* Discard the whole resource if needed. */
2107 if (usage & PIPE_MAP_DISCARD_WHOLE_RESOURCE) {
2108 if (tc_invalidate_buffer(tc, tres))
2109 usage |= PIPE_MAP_UNSYNCHRONIZED;
2110 else
2111 usage |= PIPE_MAP_DISCARD_RANGE; /* fallback */
2112 }
2113 }
2114
2115 /* We won't need this flag anymore. */
2116 /* TODO: We might not need TC_TRANSFER_MAP_NO_INVALIDATE with this. */
2117 usage &= ~PIPE_MAP_DISCARD_WHOLE_RESOURCE;
2118
2119 /* GL_AMD_pinned_memory and persistent mappings can't use staging
2120 * buffers. */
2121 if (usage & (PIPE_MAP_UNSYNCHRONIZED |
2122 PIPE_MAP_PERSISTENT) ||
2123 tres->is_user_ptr)
2124 usage &= ~PIPE_MAP_DISCARD_RANGE;
2125
2126 /* Unsychronized buffer mappings don't have to synchronize the thread. */
2127 if (usage & PIPE_MAP_UNSYNCHRONIZED) {
2128 usage &= ~PIPE_MAP_DISCARD_RANGE;
2129 usage |= TC_TRANSFER_MAP_THREADED_UNSYNC; /* notify the driver */
2130 }
2131
2132 return usage;
2133 }
2134
2135 static void *
tc_buffer_map(struct pipe_context * _pipe,struct pipe_resource * resource,unsigned level,unsigned usage,const struct pipe_box * box,struct pipe_transfer ** transfer)2136 tc_buffer_map(struct pipe_context *_pipe,
2137 struct pipe_resource *resource, unsigned level,
2138 unsigned usage, const struct pipe_box *box,
2139 struct pipe_transfer **transfer)
2140 {
2141 struct threaded_context *tc = threaded_context(_pipe);
2142 struct threaded_resource *tres = threaded_resource(resource);
2143 struct pipe_context *pipe = tc->pipe;
2144
2145 usage = tc_improve_map_buffer_flags(tc, tres, usage, box->x, box->width);
2146
2147 /* Do a staging transfer within the threaded context. The driver should
2148 * only get resource_copy_region.
2149 */
2150 if (usage & PIPE_MAP_DISCARD_RANGE) {
2151 struct threaded_transfer *ttrans = slab_alloc(&tc->pool_transfers);
2152 uint8_t *map;
2153
2154 ttrans->staging = NULL;
2155
2156 u_upload_alloc(tc->base.stream_uploader, 0,
2157 box->width + (box->x % tc->map_buffer_alignment),
2158 tc->map_buffer_alignment, &ttrans->b.offset,
2159 &ttrans->staging, (void**)&map);
2160 if (!map) {
2161 slab_free(&tc->pool_transfers, ttrans);
2162 return NULL;
2163 }
2164
2165 ttrans->b.resource = resource;
2166 ttrans->b.level = 0;
2167 ttrans->b.usage = usage;
2168 ttrans->b.box = *box;
2169 ttrans->b.stride = 0;
2170 ttrans->b.layer_stride = 0;
2171 ttrans->valid_buffer_range = &tres->valid_buffer_range;
2172 *transfer = &ttrans->b;
2173
2174 p_atomic_inc(&tres->pending_staging_uploads);
2175 util_range_add(resource, &tres->pending_staging_uploads_range,
2176 box->x, box->x + box->width);
2177
2178 return map + (box->x % tc->map_buffer_alignment);
2179 }
2180
2181 if (usage & PIPE_MAP_UNSYNCHRONIZED &&
2182 p_atomic_read(&tres->pending_staging_uploads) &&
2183 util_ranges_intersect(&tres->pending_staging_uploads_range, box->x, box->x + box->width)) {
2184 /* Write conflict detected between a staging transfer and the direct mapping we're
2185 * going to do. Resolve the conflict by ignoring UNSYNCHRONIZED so the direct mapping
2186 * will have to wait for the staging transfer completion.
2187 * Note: The conflict detection is only based on the mapped range, not on the actual
2188 * written range(s).
2189 */
2190 usage &= ~PIPE_MAP_UNSYNCHRONIZED & ~TC_TRANSFER_MAP_THREADED_UNSYNC;
2191 tc->use_forced_staging_uploads = false;
2192 }
2193
2194 /* Unsychronized buffer mappings don't have to synchronize the thread. */
2195 if (!(usage & TC_TRANSFER_MAP_THREADED_UNSYNC)) {
2196 tc_sync_msg(tc, usage & PIPE_MAP_DISCARD_RANGE ? " discard_range" :
2197 usage & PIPE_MAP_READ ? " read" : " staging conflict");
2198 tc_set_driver_thread(tc);
2199 }
2200
2201 tc->bytes_mapped_estimate += box->width;
2202
2203 void *ret = pipe->buffer_map(pipe, tres->latest ? tres->latest : resource,
2204 level, usage, box, transfer);
2205 threaded_transfer(*transfer)->valid_buffer_range = &tres->valid_buffer_range;
2206
2207 if (!(usage & TC_TRANSFER_MAP_THREADED_UNSYNC))
2208 tc_clear_driver_thread(tc);
2209
2210 return ret;
2211 }
2212
2213 static void *
tc_texture_map(struct pipe_context * _pipe,struct pipe_resource * resource,unsigned level,unsigned usage,const struct pipe_box * box,struct pipe_transfer ** transfer)2214 tc_texture_map(struct pipe_context *_pipe,
2215 struct pipe_resource *resource, unsigned level,
2216 unsigned usage, const struct pipe_box *box,
2217 struct pipe_transfer **transfer)
2218 {
2219 struct threaded_context *tc = threaded_context(_pipe);
2220 struct threaded_resource *tres = threaded_resource(resource);
2221 struct pipe_context *pipe = tc->pipe;
2222
2223 tc_sync_msg(tc, "texture");
2224 tc_set_driver_thread(tc);
2225
2226 tc->bytes_mapped_estimate += box->width;
2227
2228 void *ret = pipe->texture_map(pipe, tres->latest ? tres->latest : resource,
2229 level, usage, box, transfer);
2230
2231 if (!(usage & TC_TRANSFER_MAP_THREADED_UNSYNC))
2232 tc_clear_driver_thread(tc);
2233
2234 return ret;
2235 }
2236
2237 struct tc_transfer_flush_region {
2238 struct tc_call_base base;
2239 struct pipe_box box;
2240 struct pipe_transfer *transfer;
2241 };
2242
2243 static uint16_t
tc_call_transfer_flush_region(struct pipe_context * pipe,void * call,uint64_t * last)2244 tc_call_transfer_flush_region(struct pipe_context *pipe, void *call, uint64_t *last)
2245 {
2246 struct tc_transfer_flush_region *p = to_call(call, tc_transfer_flush_region);
2247
2248 pipe->transfer_flush_region(pipe, p->transfer, &p->box);
2249 return call_size(tc_transfer_flush_region);
2250 }
2251
2252 struct tc_resource_copy_region {
2253 struct tc_call_base base;
2254 unsigned dst_level;
2255 unsigned dstx, dsty, dstz;
2256 unsigned src_level;
2257 struct pipe_box src_box;
2258 struct pipe_resource *dst;
2259 struct pipe_resource *src;
2260 };
2261
2262 static void
2263 tc_resource_copy_region(struct pipe_context *_pipe,
2264 struct pipe_resource *dst, unsigned dst_level,
2265 unsigned dstx, unsigned dsty, unsigned dstz,
2266 struct pipe_resource *src, unsigned src_level,
2267 const struct pipe_box *src_box);
2268
2269 static void
tc_buffer_do_flush_region(struct threaded_context * tc,struct threaded_transfer * ttrans,const struct pipe_box * box)2270 tc_buffer_do_flush_region(struct threaded_context *tc,
2271 struct threaded_transfer *ttrans,
2272 const struct pipe_box *box)
2273 {
2274 struct threaded_resource *tres = threaded_resource(ttrans->b.resource);
2275
2276 if (ttrans->staging) {
2277 struct pipe_box src_box;
2278
2279 u_box_1d(ttrans->b.offset + ttrans->b.box.x % tc->map_buffer_alignment +
2280 (box->x - ttrans->b.box.x),
2281 box->width, &src_box);
2282
2283 /* Copy the staging buffer into the original one. */
2284 tc_resource_copy_region(&tc->base, ttrans->b.resource, 0, box->x, 0, 0,
2285 ttrans->staging, 0, &src_box);
2286 }
2287
2288 util_range_add(&tres->b, ttrans->valid_buffer_range,
2289 box->x, box->x + box->width);
2290 }
2291
2292 static void
tc_transfer_flush_region(struct pipe_context * _pipe,struct pipe_transfer * transfer,const struct pipe_box * rel_box)2293 tc_transfer_flush_region(struct pipe_context *_pipe,
2294 struct pipe_transfer *transfer,
2295 const struct pipe_box *rel_box)
2296 {
2297 struct threaded_context *tc = threaded_context(_pipe);
2298 struct threaded_transfer *ttrans = threaded_transfer(transfer);
2299 struct threaded_resource *tres = threaded_resource(transfer->resource);
2300 unsigned required_usage = PIPE_MAP_WRITE |
2301 PIPE_MAP_FLUSH_EXPLICIT;
2302
2303 if (tres->b.target == PIPE_BUFFER) {
2304 if ((transfer->usage & required_usage) == required_usage) {
2305 struct pipe_box box;
2306
2307 u_box_1d(transfer->box.x + rel_box->x, rel_box->width, &box);
2308 tc_buffer_do_flush_region(tc, ttrans, &box);
2309 }
2310
2311 /* Staging transfers don't send the call to the driver. */
2312 if (ttrans->staging)
2313 return;
2314 }
2315
2316 struct tc_transfer_flush_region *p =
2317 tc_add_call(tc, TC_CALL_transfer_flush_region, tc_transfer_flush_region);
2318 p->transfer = transfer;
2319 p->box = *rel_box;
2320 }
2321
2322 static void
2323 tc_flush(struct pipe_context *_pipe, struct pipe_fence_handle **fence,
2324 unsigned flags);
2325
2326 struct tc_buffer_unmap {
2327 struct tc_call_base base;
2328 bool was_staging_transfer;
2329 union {
2330 struct pipe_transfer *transfer;
2331 struct pipe_resource *resource;
2332 };
2333 };
2334
2335 static uint16_t
tc_call_buffer_unmap(struct pipe_context * pipe,void * call,uint64_t * last)2336 tc_call_buffer_unmap(struct pipe_context *pipe, void *call, uint64_t *last)
2337 {
2338 struct tc_buffer_unmap *p = to_call(call, tc_buffer_unmap);
2339
2340 if (p->was_staging_transfer) {
2341 struct threaded_resource *tres = threaded_resource(p->resource);
2342 /* Nothing to do except keeping track of staging uploads */
2343 assert(tres->pending_staging_uploads > 0);
2344 p_atomic_dec(&tres->pending_staging_uploads);
2345 tc_drop_resource_reference(p->resource);
2346 } else {
2347 pipe->buffer_unmap(pipe, p->transfer);
2348 }
2349
2350 return call_size(tc_buffer_unmap);
2351 }
2352
2353 static void
tc_buffer_unmap(struct pipe_context * _pipe,struct pipe_transfer * transfer)2354 tc_buffer_unmap(struct pipe_context *_pipe, struct pipe_transfer *transfer)
2355 {
2356 struct threaded_context *tc = threaded_context(_pipe);
2357 struct threaded_transfer *ttrans = threaded_transfer(transfer);
2358 struct threaded_resource *tres = threaded_resource(transfer->resource);
2359
2360 /* PIPE_MAP_THREAD_SAFE is only valid with UNSYNCHRONIZED. It can be
2361 * called from any thread and bypasses all multithreaded queues.
2362 */
2363 if (transfer->usage & PIPE_MAP_THREAD_SAFE) {
2364 assert(transfer->usage & PIPE_MAP_UNSYNCHRONIZED);
2365 assert(!(transfer->usage & (PIPE_MAP_FLUSH_EXPLICIT |
2366 PIPE_MAP_DISCARD_RANGE)));
2367
2368 struct pipe_context *pipe = tc->pipe;
2369 util_range_add(&tres->b, ttrans->valid_buffer_range,
2370 transfer->box.x, transfer->box.x + transfer->box.width);
2371
2372 pipe->buffer_unmap(pipe, transfer);
2373 return;
2374 }
2375
2376 bool was_staging_transfer = false;
2377
2378 if (transfer->usage & PIPE_MAP_WRITE &&
2379 !(transfer->usage & PIPE_MAP_FLUSH_EXPLICIT))
2380 tc_buffer_do_flush_region(tc, ttrans, &transfer->box);
2381
2382 if (ttrans->staging) {
2383 was_staging_transfer = true;
2384
2385 tc_drop_resource_reference(ttrans->staging);
2386 slab_free(&tc->pool_transfers, ttrans);
2387 }
2388
2389 struct tc_buffer_unmap *p = tc_add_call(tc, TC_CALL_buffer_unmap,
2390 tc_buffer_unmap);
2391 if (was_staging_transfer) {
2392 tc_set_resource_reference(&p->resource, &tres->b);
2393 p->was_staging_transfer = true;
2394 } else {
2395 p->transfer = transfer;
2396 p->was_staging_transfer = false;
2397 }
2398
2399 /* tc_buffer_map directly maps the buffers, but tc_buffer_unmap
2400 * defers the unmap operation to the batch execution.
2401 * bytes_mapped_estimate is an estimation of the map/unmap bytes delta
2402 * and if it goes over an optional limit the current batch is flushed,
2403 * to reclaim some RAM. */
2404 if (!ttrans->staging && tc->bytes_mapped_limit &&
2405 tc->bytes_mapped_estimate > tc->bytes_mapped_limit) {
2406 tc_flush(_pipe, NULL, PIPE_FLUSH_ASYNC);
2407 }
2408 }
2409
2410 struct tc_texture_unmap {
2411 struct tc_call_base base;
2412 struct pipe_transfer *transfer;
2413 };
2414
2415 static uint16_t
tc_call_texture_unmap(struct pipe_context * pipe,void * call,uint64_t * last)2416 tc_call_texture_unmap(struct pipe_context *pipe, void *call, uint64_t *last)
2417 {
2418 struct tc_texture_unmap *p = (struct tc_texture_unmap *) call;
2419
2420 pipe->texture_unmap(pipe, p->transfer);
2421 return call_size(tc_texture_unmap);
2422 }
2423
2424 static void
tc_texture_unmap(struct pipe_context * _pipe,struct pipe_transfer * transfer)2425 tc_texture_unmap(struct pipe_context *_pipe, struct pipe_transfer *transfer)
2426 {
2427 struct threaded_context *tc = threaded_context(_pipe);
2428 struct threaded_transfer *ttrans = threaded_transfer(transfer);
2429
2430 tc_add_call(tc, TC_CALL_texture_unmap, tc_texture_unmap)->transfer = transfer;
2431
2432 /* tc_texture_map directly maps the textures, but tc_texture_unmap
2433 * defers the unmap operation to the batch execution.
2434 * bytes_mapped_estimate is an estimation of the map/unmap bytes delta
2435 * and if it goes over an optional limit the current batch is flushed,
2436 * to reclaim some RAM. */
2437 if (!ttrans->staging && tc->bytes_mapped_limit &&
2438 tc->bytes_mapped_estimate > tc->bytes_mapped_limit) {
2439 tc_flush(_pipe, NULL, PIPE_FLUSH_ASYNC);
2440 }
2441 }
2442
2443 struct tc_buffer_subdata {
2444 struct tc_call_base base;
2445 unsigned usage, offset, size;
2446 struct pipe_resource *resource;
2447 char slot[0]; /* more will be allocated if needed */
2448 };
2449
2450 static uint16_t
tc_call_buffer_subdata(struct pipe_context * pipe,void * call,uint64_t * last)2451 tc_call_buffer_subdata(struct pipe_context *pipe, void *call, uint64_t *last)
2452 {
2453 struct tc_buffer_subdata *p = (struct tc_buffer_subdata *)call;
2454
2455 pipe->buffer_subdata(pipe, p->resource, p->usage, p->offset, p->size,
2456 p->slot);
2457 tc_drop_resource_reference(p->resource);
2458 return p->base.num_slots;
2459 }
2460
2461 static void
tc_buffer_subdata(struct pipe_context * _pipe,struct pipe_resource * resource,unsigned usage,unsigned offset,unsigned size,const void * data)2462 tc_buffer_subdata(struct pipe_context *_pipe,
2463 struct pipe_resource *resource,
2464 unsigned usage, unsigned offset,
2465 unsigned size, const void *data)
2466 {
2467 struct threaded_context *tc = threaded_context(_pipe);
2468 struct threaded_resource *tres = threaded_resource(resource);
2469
2470 if (!size)
2471 return;
2472
2473 usage |= PIPE_MAP_WRITE;
2474
2475 /* PIPE_MAP_DIRECTLY supresses implicit DISCARD_RANGE. */
2476 if (!(usage & PIPE_MAP_DIRECTLY))
2477 usage |= PIPE_MAP_DISCARD_RANGE;
2478
2479 usage = tc_improve_map_buffer_flags(tc, tres, usage, offset, size);
2480
2481 /* Unsychronized and big transfers should use transfer_map. Also handle
2482 * full invalidations, because drivers aren't allowed to do them.
2483 */
2484 if (usage & (PIPE_MAP_UNSYNCHRONIZED |
2485 PIPE_MAP_DISCARD_WHOLE_RESOURCE) ||
2486 size > TC_MAX_SUBDATA_BYTES) {
2487 struct pipe_transfer *transfer;
2488 struct pipe_box box;
2489 uint8_t *map = NULL;
2490
2491 u_box_1d(offset, size, &box);
2492
2493 map = tc_buffer_map(_pipe, resource, 0, usage, &box, &transfer);
2494 if (map) {
2495 memcpy(map, data, size);
2496 tc_buffer_unmap(_pipe, transfer);
2497 }
2498 return;
2499 }
2500
2501 util_range_add(&tres->b, &tres->valid_buffer_range, offset, offset + size);
2502
2503 /* The upload is small. Enqueue it. */
2504 struct tc_buffer_subdata *p =
2505 tc_add_slot_based_call(tc, TC_CALL_buffer_subdata, tc_buffer_subdata, size);
2506
2507 tc_set_resource_reference(&p->resource, resource);
2508 /* This is will always be busy because if it wasn't, tc_improve_map_buffer-
2509 * _flags would set UNSYNCHRONIZED and we wouldn't get here.
2510 */
2511 tc_add_to_buffer_list(&tc->buffer_lists[tc->next_buf_list], resource);
2512 p->usage = usage;
2513 p->offset = offset;
2514 p->size = size;
2515 memcpy(p->slot, data, size);
2516 }
2517
2518 struct tc_texture_subdata {
2519 struct tc_call_base base;
2520 unsigned level, usage, stride, layer_stride;
2521 struct pipe_box box;
2522 struct pipe_resource *resource;
2523 char slot[0]; /* more will be allocated if needed */
2524 };
2525
2526 static uint16_t
tc_call_texture_subdata(struct pipe_context * pipe,void * call,uint64_t * last)2527 tc_call_texture_subdata(struct pipe_context *pipe, void *call, uint64_t *last)
2528 {
2529 struct tc_texture_subdata *p = (struct tc_texture_subdata *)call;
2530
2531 pipe->texture_subdata(pipe, p->resource, p->level, p->usage, &p->box,
2532 p->slot, p->stride, p->layer_stride);
2533 tc_drop_resource_reference(p->resource);
2534 return p->base.num_slots;
2535 }
2536
2537 static void
tc_texture_subdata(struct pipe_context * _pipe,struct pipe_resource * resource,unsigned level,unsigned usage,const struct pipe_box * box,const void * data,unsigned stride,unsigned layer_stride)2538 tc_texture_subdata(struct pipe_context *_pipe,
2539 struct pipe_resource *resource,
2540 unsigned level, unsigned usage,
2541 const struct pipe_box *box,
2542 const void *data, unsigned stride,
2543 unsigned layer_stride)
2544 {
2545 struct threaded_context *tc = threaded_context(_pipe);
2546 unsigned size;
2547
2548 assert(box->height >= 1);
2549 assert(box->depth >= 1);
2550
2551 size = (box->depth - 1) * layer_stride +
2552 (box->height - 1) * stride +
2553 box->width * util_format_get_blocksize(resource->format);
2554 if (!size)
2555 return;
2556
2557 /* Small uploads can be enqueued, big uploads must sync. */
2558 if (size <= TC_MAX_SUBDATA_BYTES) {
2559 struct tc_texture_subdata *p =
2560 tc_add_slot_based_call(tc, TC_CALL_texture_subdata, tc_texture_subdata, size);
2561
2562 tc_set_resource_reference(&p->resource, resource);
2563 p->level = level;
2564 p->usage = usage;
2565 p->box = *box;
2566 p->stride = stride;
2567 p->layer_stride = layer_stride;
2568 memcpy(p->slot, data, size);
2569 } else {
2570 struct pipe_context *pipe = tc->pipe;
2571
2572 tc_sync(tc);
2573 tc_set_driver_thread(tc);
2574 pipe->texture_subdata(pipe, resource, level, usage, box, data,
2575 stride, layer_stride);
2576 tc_clear_driver_thread(tc);
2577 }
2578 }
2579
2580
2581 /********************************************************************
2582 * miscellaneous
2583 */
2584
2585 #define TC_FUNC_SYNC_RET0(ret_type, func) \
2586 static ret_type \
2587 tc_##func(struct pipe_context *_pipe) \
2588 { \
2589 struct threaded_context *tc = threaded_context(_pipe); \
2590 struct pipe_context *pipe = tc->pipe; \
2591 tc_sync(tc); \
2592 return pipe->func(pipe); \
2593 }
2594
TC_FUNC_SYNC_RET0(uint64_t,get_timestamp)2595 TC_FUNC_SYNC_RET0(uint64_t, get_timestamp)
2596
2597 static void
2598 tc_get_sample_position(struct pipe_context *_pipe,
2599 unsigned sample_count, unsigned sample_index,
2600 float *out_value)
2601 {
2602 struct threaded_context *tc = threaded_context(_pipe);
2603 struct pipe_context *pipe = tc->pipe;
2604
2605 tc_sync(tc);
2606 pipe->get_sample_position(pipe, sample_count, sample_index,
2607 out_value);
2608 }
2609
2610 static enum pipe_reset_status
tc_get_device_reset_status(struct pipe_context * _pipe)2611 tc_get_device_reset_status(struct pipe_context *_pipe)
2612 {
2613 struct threaded_context *tc = threaded_context(_pipe);
2614 struct pipe_context *pipe = tc->pipe;
2615
2616 if (!tc->options.unsynchronized_get_device_reset_status)
2617 tc_sync(tc);
2618
2619 return pipe->get_device_reset_status(pipe);
2620 }
2621
2622 static void
tc_set_device_reset_callback(struct pipe_context * _pipe,const struct pipe_device_reset_callback * cb)2623 tc_set_device_reset_callback(struct pipe_context *_pipe,
2624 const struct pipe_device_reset_callback *cb)
2625 {
2626 struct threaded_context *tc = threaded_context(_pipe);
2627 struct pipe_context *pipe = tc->pipe;
2628
2629 tc_sync(tc);
2630 pipe->set_device_reset_callback(pipe, cb);
2631 }
2632
2633 struct tc_string_marker {
2634 struct tc_call_base base;
2635 int len;
2636 char slot[0]; /* more will be allocated if needed */
2637 };
2638
2639 static uint16_t
tc_call_emit_string_marker(struct pipe_context * pipe,void * call,uint64_t * last)2640 tc_call_emit_string_marker(struct pipe_context *pipe, void *call, uint64_t *last)
2641 {
2642 struct tc_string_marker *p = (struct tc_string_marker *)call;
2643 pipe->emit_string_marker(pipe, p->slot, p->len);
2644 return p->base.num_slots;
2645 }
2646
2647 static void
tc_emit_string_marker(struct pipe_context * _pipe,const char * string,int len)2648 tc_emit_string_marker(struct pipe_context *_pipe,
2649 const char *string, int len)
2650 {
2651 struct threaded_context *tc = threaded_context(_pipe);
2652
2653 if (len <= TC_MAX_STRING_MARKER_BYTES) {
2654 struct tc_string_marker *p =
2655 tc_add_slot_based_call(tc, TC_CALL_emit_string_marker, tc_string_marker, len);
2656
2657 memcpy(p->slot, string, len);
2658 p->len = len;
2659 } else {
2660 struct pipe_context *pipe = tc->pipe;
2661
2662 tc_sync(tc);
2663 tc_set_driver_thread(tc);
2664 pipe->emit_string_marker(pipe, string, len);
2665 tc_clear_driver_thread(tc);
2666 }
2667 }
2668
2669 static void
tc_dump_debug_state(struct pipe_context * _pipe,FILE * stream,unsigned flags)2670 tc_dump_debug_state(struct pipe_context *_pipe, FILE *stream,
2671 unsigned flags)
2672 {
2673 struct threaded_context *tc = threaded_context(_pipe);
2674 struct pipe_context *pipe = tc->pipe;
2675
2676 tc_sync(tc);
2677 pipe->dump_debug_state(pipe, stream, flags);
2678 }
2679
2680 static void
tc_set_debug_callback(struct pipe_context * _pipe,const struct pipe_debug_callback * cb)2681 tc_set_debug_callback(struct pipe_context *_pipe,
2682 const struct pipe_debug_callback *cb)
2683 {
2684 struct threaded_context *tc = threaded_context(_pipe);
2685 struct pipe_context *pipe = tc->pipe;
2686
2687 /* Drop all synchronous debug callbacks. Drivers are expected to be OK
2688 * with this. shader-db will use an environment variable to disable
2689 * the threaded context.
2690 */
2691 if (cb && cb->debug_message && !cb->async)
2692 return;
2693
2694 tc_sync(tc);
2695 pipe->set_debug_callback(pipe, cb);
2696 }
2697
2698 static void
tc_set_log_context(struct pipe_context * _pipe,struct u_log_context * log)2699 tc_set_log_context(struct pipe_context *_pipe, struct u_log_context *log)
2700 {
2701 struct threaded_context *tc = threaded_context(_pipe);
2702 struct pipe_context *pipe = tc->pipe;
2703
2704 tc_sync(tc);
2705 pipe->set_log_context(pipe, log);
2706 }
2707
2708 static void
tc_create_fence_fd(struct pipe_context * _pipe,struct pipe_fence_handle ** fence,int fd,enum pipe_fd_type type)2709 tc_create_fence_fd(struct pipe_context *_pipe,
2710 struct pipe_fence_handle **fence, int fd,
2711 enum pipe_fd_type type)
2712 {
2713 struct threaded_context *tc = threaded_context(_pipe);
2714 struct pipe_context *pipe = tc->pipe;
2715
2716 tc_sync(tc);
2717 pipe->create_fence_fd(pipe, fence, fd, type);
2718 }
2719
2720 struct tc_fence_call {
2721 struct tc_call_base base;
2722 struct pipe_fence_handle *fence;
2723 };
2724
2725 static uint16_t
tc_call_fence_server_sync(struct pipe_context * pipe,void * call,uint64_t * last)2726 tc_call_fence_server_sync(struct pipe_context *pipe, void *call, uint64_t *last)
2727 {
2728 struct pipe_fence_handle *fence = to_call(call, tc_fence_call)->fence;
2729
2730 pipe->fence_server_sync(pipe, fence);
2731 pipe->screen->fence_reference(pipe->screen, &fence, NULL);
2732 return call_size(tc_fence_call);
2733 }
2734
2735 static void
tc_fence_server_sync(struct pipe_context * _pipe,struct pipe_fence_handle * fence)2736 tc_fence_server_sync(struct pipe_context *_pipe,
2737 struct pipe_fence_handle *fence)
2738 {
2739 struct threaded_context *tc = threaded_context(_pipe);
2740 struct pipe_screen *screen = tc->pipe->screen;
2741 struct tc_fence_call *call = tc_add_call(tc, TC_CALL_fence_server_sync,
2742 tc_fence_call);
2743
2744 call->fence = NULL;
2745 screen->fence_reference(screen, &call->fence, fence);
2746 }
2747
2748 static uint16_t
tc_call_fence_server_signal(struct pipe_context * pipe,void * call,uint64_t * last)2749 tc_call_fence_server_signal(struct pipe_context *pipe, void *call, uint64_t *last)
2750 {
2751 struct pipe_fence_handle *fence = to_call(call, tc_fence_call)->fence;
2752
2753 pipe->fence_server_signal(pipe, fence);
2754 pipe->screen->fence_reference(pipe->screen, &fence, NULL);
2755 return call_size(tc_fence_call);
2756 }
2757
2758 static void
tc_fence_server_signal(struct pipe_context * _pipe,struct pipe_fence_handle * fence)2759 tc_fence_server_signal(struct pipe_context *_pipe,
2760 struct pipe_fence_handle *fence)
2761 {
2762 struct threaded_context *tc = threaded_context(_pipe);
2763 struct pipe_screen *screen = tc->pipe->screen;
2764 struct tc_fence_call *call = tc_add_call(tc, TC_CALL_fence_server_signal,
2765 tc_fence_call);
2766
2767 call->fence = NULL;
2768 screen->fence_reference(screen, &call->fence, fence);
2769 }
2770
2771 static struct pipe_video_codec *
tc_create_video_codec(UNUSED struct pipe_context * _pipe,UNUSED const struct pipe_video_codec * templ)2772 tc_create_video_codec(UNUSED struct pipe_context *_pipe,
2773 UNUSED const struct pipe_video_codec *templ)
2774 {
2775 unreachable("Threaded context should not be enabled for video APIs");
2776 return NULL;
2777 }
2778
2779 static struct pipe_video_buffer *
tc_create_video_buffer(UNUSED struct pipe_context * _pipe,UNUSED const struct pipe_video_buffer * templ)2780 tc_create_video_buffer(UNUSED struct pipe_context *_pipe,
2781 UNUSED const struct pipe_video_buffer *templ)
2782 {
2783 unreachable("Threaded context should not be enabled for video APIs");
2784 return NULL;
2785 }
2786
2787 struct tc_context_param {
2788 struct tc_call_base base;
2789 enum pipe_context_param param;
2790 unsigned value;
2791 };
2792
2793 static uint16_t
tc_call_set_context_param(struct pipe_context * pipe,void * call,uint64_t * last)2794 tc_call_set_context_param(struct pipe_context *pipe, void *call, uint64_t *last)
2795 {
2796 struct tc_context_param *p = to_call(call, tc_context_param);
2797
2798 if (pipe->set_context_param)
2799 pipe->set_context_param(pipe, p->param, p->value);
2800
2801 return call_size(tc_context_param);
2802 }
2803
2804 static void
tc_set_context_param(struct pipe_context * _pipe,enum pipe_context_param param,unsigned value)2805 tc_set_context_param(struct pipe_context *_pipe,
2806 enum pipe_context_param param,
2807 unsigned value)
2808 {
2809 struct threaded_context *tc = threaded_context(_pipe);
2810
2811 if (param == PIPE_CONTEXT_PARAM_PIN_THREADS_TO_L3_CACHE) {
2812 /* Pin the gallium thread as requested. */
2813 util_set_thread_affinity(tc->queue.threads[0],
2814 util_get_cpu_caps()->L3_affinity_mask[value],
2815 NULL, util_get_cpu_caps()->num_cpu_mask_bits);
2816
2817 /* Execute this immediately (without enqueuing).
2818 * It's required to be thread-safe.
2819 */
2820 struct pipe_context *pipe = tc->pipe;
2821 if (pipe->set_context_param)
2822 pipe->set_context_param(pipe, param, value);
2823 return;
2824 }
2825
2826 if (tc->pipe->set_context_param) {
2827 struct tc_context_param *call =
2828 tc_add_call(tc, TC_CALL_set_context_param, tc_context_param);
2829
2830 call->param = param;
2831 call->value = value;
2832 }
2833 }
2834
2835
2836 /********************************************************************
2837 * draw, launch, clear, blit, copy, flush
2838 */
2839
2840 struct tc_flush_call {
2841 struct tc_call_base base;
2842 unsigned flags;
2843 struct threaded_context *tc;
2844 struct pipe_fence_handle *fence;
2845 };
2846
2847 static void
tc_flush_queries(struct threaded_context * tc)2848 tc_flush_queries(struct threaded_context *tc)
2849 {
2850 struct threaded_query *tq, *tmp;
2851 LIST_FOR_EACH_ENTRY_SAFE(tq, tmp, &tc->unflushed_queries, head_unflushed) {
2852 list_del(&tq->head_unflushed);
2853
2854 /* Memory release semantics: due to a possible race with
2855 * tc_get_query_result, we must ensure that the linked list changes
2856 * are visible before setting tq->flushed.
2857 */
2858 p_atomic_set(&tq->flushed, true);
2859 }
2860 }
2861
2862 static uint16_t
tc_call_flush(struct pipe_context * pipe,void * call,uint64_t * last)2863 tc_call_flush(struct pipe_context *pipe, void *call, uint64_t *last)
2864 {
2865 struct tc_flush_call *p = to_call(call, tc_flush_call);
2866 struct pipe_screen *screen = pipe->screen;
2867
2868 pipe->flush(pipe, p->fence ? &p->fence : NULL, p->flags);
2869 screen->fence_reference(screen, &p->fence, NULL);
2870
2871 if (!(p->flags & PIPE_FLUSH_DEFERRED))
2872 tc_flush_queries(p->tc);
2873
2874 return call_size(tc_flush_call);
2875 }
2876
2877 static void
tc_flush(struct pipe_context * _pipe,struct pipe_fence_handle ** fence,unsigned flags)2878 tc_flush(struct pipe_context *_pipe, struct pipe_fence_handle **fence,
2879 unsigned flags)
2880 {
2881 struct threaded_context *tc = threaded_context(_pipe);
2882 struct pipe_context *pipe = tc->pipe;
2883 struct pipe_screen *screen = pipe->screen;
2884 bool async = flags & (PIPE_FLUSH_DEFERRED | PIPE_FLUSH_ASYNC);
2885
2886 if (async && tc->options.create_fence) {
2887 if (fence) {
2888 struct tc_batch *next = &tc->batch_slots[tc->next];
2889
2890 if (!next->token) {
2891 next->token = malloc(sizeof(*next->token));
2892 if (!next->token)
2893 goto out_of_memory;
2894
2895 pipe_reference_init(&next->token->ref, 1);
2896 next->token->tc = tc;
2897 }
2898
2899 screen->fence_reference(screen, fence,
2900 tc->options.create_fence(pipe, next->token));
2901 if (!*fence)
2902 goto out_of_memory;
2903 }
2904
2905 struct tc_flush_call *p = tc_add_call(tc, TC_CALL_flush, tc_flush_call);
2906 p->tc = tc;
2907 p->fence = fence ? *fence : NULL;
2908 p->flags = flags | TC_FLUSH_ASYNC;
2909
2910 if (!(flags & PIPE_FLUSH_DEFERRED))
2911 tc_batch_flush(tc);
2912 return;
2913 }
2914
2915 out_of_memory:
2916 tc_sync_msg(tc, flags & PIPE_FLUSH_END_OF_FRAME ? "end of frame" :
2917 flags & PIPE_FLUSH_DEFERRED ? "deferred fence" : "normal");
2918
2919 if (!(flags & PIPE_FLUSH_DEFERRED))
2920 tc_flush_queries(tc);
2921 tc_set_driver_thread(tc);
2922 pipe->flush(pipe, fence, flags);
2923 tc_clear_driver_thread(tc);
2924 }
2925
2926 struct tc_draw_single {
2927 struct tc_call_base base;
2928 unsigned index_bias;
2929 struct pipe_draw_info info;
2930 };
2931
2932 struct tc_draw_single_drawid {
2933 struct tc_draw_single base;
2934 unsigned drawid_offset;
2935 };
2936
2937 static uint16_t
tc_call_draw_single_drawid(struct pipe_context * pipe,void * call,uint64_t * last)2938 tc_call_draw_single_drawid(struct pipe_context *pipe, void *call, uint64_t *last)
2939 {
2940 struct tc_draw_single_drawid *info_drawid = to_call(call, tc_draw_single_drawid);
2941 struct tc_draw_single *info = &info_drawid->base;
2942
2943 /* u_threaded_context stores start/count in min/max_index for single draws. */
2944 /* Drivers using u_threaded_context shouldn't use min/max_index. */
2945 struct pipe_draw_start_count_bias draw;
2946
2947 draw.start = info->info.min_index;
2948 draw.count = info->info.max_index;
2949 draw.index_bias = info->index_bias;
2950
2951 info->info.index_bounds_valid = false;
2952 info->info.has_user_indices = false;
2953 info->info.take_index_buffer_ownership = false;
2954
2955 pipe->draw_vbo(pipe, &info->info, info_drawid->drawid_offset, NULL, &draw, 1);
2956 if (info->info.index_size)
2957 tc_drop_resource_reference(info->info.index.resource);
2958
2959 return call_size(tc_draw_single_drawid);
2960 }
2961
2962 static void
simplify_draw_info(struct pipe_draw_info * info)2963 simplify_draw_info(struct pipe_draw_info *info)
2964 {
2965 /* Clear these fields to facilitate draw merging.
2966 * Drivers shouldn't use them.
2967 */
2968 info->has_user_indices = false;
2969 info->index_bounds_valid = false;
2970 info->take_index_buffer_ownership = false;
2971 info->index_bias_varies = false;
2972 info->_pad = 0;
2973
2974 /* This shouldn't be set when merging single draws. */
2975 info->increment_draw_id = false;
2976
2977 if (info->index_size) {
2978 if (!info->primitive_restart)
2979 info->restart_index = 0;
2980 } else {
2981 assert(!info->primitive_restart);
2982 info->primitive_restart = false;
2983 info->restart_index = 0;
2984 info->index.resource = NULL;
2985 }
2986 }
2987
2988 static bool
is_next_call_a_mergeable_draw(struct tc_draw_single * first,struct tc_draw_single * next)2989 is_next_call_a_mergeable_draw(struct tc_draw_single *first,
2990 struct tc_draw_single *next)
2991 {
2992 if (next->base.call_id != TC_CALL_draw_single)
2993 return false;
2994
2995 simplify_draw_info(&next->info);
2996
2997 STATIC_ASSERT(offsetof(struct pipe_draw_info, min_index) ==
2998 sizeof(struct pipe_draw_info) - 8);
2999 STATIC_ASSERT(offsetof(struct pipe_draw_info, max_index) ==
3000 sizeof(struct pipe_draw_info) - 4);
3001 /* All fields must be the same except start and count. */
3002 /* u_threaded_context stores start/count in min/max_index for single draws. */
3003 return memcmp((uint32_t*)&first->info, (uint32_t*)&next->info,
3004 DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX) == 0;
3005 }
3006
3007 static uint16_t
tc_call_draw_single(struct pipe_context * pipe,void * call,uint64_t * last_ptr)3008 tc_call_draw_single(struct pipe_context *pipe, void *call, uint64_t *last_ptr)
3009 {
3010 /* Draw call merging. */
3011 struct tc_draw_single *first = to_call(call, tc_draw_single);
3012 struct tc_draw_single *last = (struct tc_draw_single *)last_ptr;
3013 struct tc_draw_single *next = get_next_call(first, tc_draw_single);
3014
3015 /* If at least 2 consecutive draw calls can be merged... */
3016 if (next != last &&
3017 next->base.call_id == TC_CALL_draw_single) {
3018 simplify_draw_info(&first->info);
3019
3020 if (is_next_call_a_mergeable_draw(first, next)) {
3021 /* The maximum number of merged draws is given by the batch size. */
3022 struct pipe_draw_start_count_bias multi[TC_SLOTS_PER_BATCH / call_size(tc_draw_single)];
3023 unsigned num_draws = 2;
3024 bool index_bias_varies = first->index_bias != next->index_bias;
3025
3026 /* u_threaded_context stores start/count in min/max_index for single draws. */
3027 multi[0].start = first->info.min_index;
3028 multi[0].count = first->info.max_index;
3029 multi[0].index_bias = first->index_bias;
3030 multi[1].start = next->info.min_index;
3031 multi[1].count = next->info.max_index;
3032 multi[1].index_bias = next->index_bias;
3033
3034 /* Find how many other draws can be merged. */
3035 next = get_next_call(next, tc_draw_single);
3036 for (; next != last && is_next_call_a_mergeable_draw(first, next);
3037 next = get_next_call(next, tc_draw_single), num_draws++) {
3038 /* u_threaded_context stores start/count in min/max_index for single draws. */
3039 multi[num_draws].start = next->info.min_index;
3040 multi[num_draws].count = next->info.max_index;
3041 multi[num_draws].index_bias = next->index_bias;
3042 index_bias_varies |= first->index_bias != next->index_bias;
3043 }
3044
3045 first->info.index_bias_varies = index_bias_varies;
3046 pipe->draw_vbo(pipe, &first->info, 0, NULL, multi, num_draws);
3047
3048 /* Since all draws use the same index buffer, drop all references at once. */
3049 if (first->info.index_size)
3050 pipe_drop_resource_references(first->info.index.resource, num_draws);
3051
3052 return call_size(tc_draw_single) * num_draws;
3053 }
3054 }
3055
3056 /* u_threaded_context stores start/count in min/max_index for single draws. */
3057 /* Drivers using u_threaded_context shouldn't use min/max_index. */
3058 struct pipe_draw_start_count_bias draw;
3059
3060 draw.start = first->info.min_index;
3061 draw.count = first->info.max_index;
3062 draw.index_bias = first->index_bias;
3063
3064 first->info.index_bounds_valid = false;
3065 first->info.has_user_indices = false;
3066 first->info.take_index_buffer_ownership = false;
3067
3068 pipe->draw_vbo(pipe, &first->info, 0, NULL, &draw, 1);
3069 if (first->info.index_size)
3070 tc_drop_resource_reference(first->info.index.resource);
3071
3072 return call_size(tc_draw_single);
3073 }
3074
3075 struct tc_draw_indirect {
3076 struct tc_call_base base;
3077 struct pipe_draw_start_count_bias draw;
3078 struct pipe_draw_info info;
3079 struct pipe_draw_indirect_info indirect;
3080 };
3081
3082 static uint16_t
tc_call_draw_indirect(struct pipe_context * pipe,void * call,uint64_t * last)3083 tc_call_draw_indirect(struct pipe_context *pipe, void *call, uint64_t *last)
3084 {
3085 struct tc_draw_indirect *info = to_call(call, tc_draw_indirect);
3086
3087 info->info.index_bounds_valid = false;
3088 info->info.take_index_buffer_ownership = false;
3089
3090 pipe->draw_vbo(pipe, &info->info, 0, &info->indirect, &info->draw, 1);
3091 if (info->info.index_size)
3092 tc_drop_resource_reference(info->info.index.resource);
3093
3094 tc_drop_resource_reference(info->indirect.buffer);
3095 tc_drop_resource_reference(info->indirect.indirect_draw_count);
3096 tc_drop_so_target_reference(info->indirect.count_from_stream_output);
3097 return call_size(tc_draw_indirect);
3098 }
3099
3100 struct tc_draw_multi {
3101 struct tc_call_base base;
3102 unsigned num_draws;
3103 struct pipe_draw_info info;
3104 struct pipe_draw_start_count_bias slot[]; /* variable-sized array */
3105 };
3106
3107 static uint16_t
tc_call_draw_multi(struct pipe_context * pipe,void * call,uint64_t * last)3108 tc_call_draw_multi(struct pipe_context *pipe, void *call, uint64_t *last)
3109 {
3110 struct tc_draw_multi *info = (struct tc_draw_multi*)call;
3111
3112 info->info.has_user_indices = false;
3113 info->info.index_bounds_valid = false;
3114 info->info.take_index_buffer_ownership = false;
3115
3116 pipe->draw_vbo(pipe, &info->info, 0, NULL, info->slot, info->num_draws);
3117 if (info->info.index_size)
3118 tc_drop_resource_reference(info->info.index.resource);
3119
3120 return info->base.num_slots;
3121 }
3122
3123 #define DRAW_INFO_SIZE_WITHOUT_INDEXBUF_AND_MIN_MAX_INDEX \
3124 offsetof(struct pipe_draw_info, index)
3125
3126 void
tc_draw_vbo(struct pipe_context * _pipe,const struct pipe_draw_info * info,unsigned drawid_offset,const struct pipe_draw_indirect_info * indirect,const struct pipe_draw_start_count_bias * draws,unsigned num_draws)3127 tc_draw_vbo(struct pipe_context *_pipe, const struct pipe_draw_info *info,
3128 unsigned drawid_offset,
3129 const struct pipe_draw_indirect_info *indirect,
3130 const struct pipe_draw_start_count_bias *draws,
3131 unsigned num_draws)
3132 {
3133 STATIC_ASSERT(DRAW_INFO_SIZE_WITHOUT_INDEXBUF_AND_MIN_MAX_INDEX +
3134 sizeof(intptr_t) == offsetof(struct pipe_draw_info, min_index));
3135
3136 struct threaded_context *tc = threaded_context(_pipe);
3137 unsigned index_size = info->index_size;
3138 bool has_user_indices = info->has_user_indices;
3139
3140 if (unlikely(tc->add_all_gfx_bindings_to_buffer_list))
3141 tc_add_all_gfx_bindings_to_buffer_list(tc);
3142
3143 if (unlikely(indirect)) {
3144 assert(!has_user_indices);
3145 assert(num_draws == 1);
3146
3147 struct tc_draw_indirect *p =
3148 tc_add_call(tc, TC_CALL_draw_indirect, tc_draw_indirect);
3149 struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
3150
3151 if (index_size) {
3152 if (!info->take_index_buffer_ownership) {
3153 tc_set_resource_reference(&p->info.index.resource,
3154 info->index.resource);
3155 }
3156 tc_add_to_buffer_list(next, info->index.resource);
3157 }
3158 memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX);
3159
3160 tc_set_resource_reference(&p->indirect.buffer, indirect->buffer);
3161 tc_set_resource_reference(&p->indirect.indirect_draw_count,
3162 indirect->indirect_draw_count);
3163 p->indirect.count_from_stream_output = NULL;
3164 pipe_so_target_reference(&p->indirect.count_from_stream_output,
3165 indirect->count_from_stream_output);
3166
3167 if (indirect->buffer)
3168 tc_add_to_buffer_list(next, indirect->buffer);
3169 if (indirect->indirect_draw_count)
3170 tc_add_to_buffer_list(next, indirect->indirect_draw_count);
3171 if (indirect->count_from_stream_output)
3172 tc_add_to_buffer_list(next, indirect->count_from_stream_output->buffer);
3173
3174 memcpy(&p->indirect, indirect, sizeof(*indirect));
3175 p->draw.start = draws[0].start;
3176 return;
3177 }
3178
3179 if (num_draws == 1) {
3180 /* Single draw. */
3181 if (index_size && has_user_indices) {
3182 unsigned size = draws[0].count * index_size;
3183 struct pipe_resource *buffer = NULL;
3184 unsigned offset;
3185
3186 if (!size)
3187 return;
3188
3189 /* This must be done before adding draw_vbo, because it could generate
3190 * e.g. transfer_unmap and flush partially-uninitialized draw_vbo
3191 * to the driver if it was done afterwards.
3192 */
3193 u_upload_data(tc->base.stream_uploader, 0, size, 4,
3194 (uint8_t*)info->index.user + draws[0].start * index_size,
3195 &offset, &buffer);
3196 if (unlikely(!buffer))
3197 return;
3198
3199 struct tc_draw_single *p = drawid_offset > 0 ?
3200 &tc_add_call(tc, TC_CALL_draw_single_drawid, tc_draw_single_drawid)->base :
3201 tc_add_call(tc, TC_CALL_draw_single, tc_draw_single);
3202 memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_INDEXBUF_AND_MIN_MAX_INDEX);
3203 p->info.index.resource = buffer;
3204 if (drawid_offset > 0)
3205 ((struct tc_draw_single_drawid*)p)->drawid_offset = drawid_offset;
3206 /* u_threaded_context stores start/count in min/max_index for single draws. */
3207 p->info.min_index = offset >> util_logbase2(index_size);
3208 p->info.max_index = draws[0].count;
3209 p->index_bias = draws[0].index_bias;
3210 } else {
3211 /* Non-indexed call or indexed with a real index buffer. */
3212 struct tc_draw_single *p = drawid_offset > 0 ?
3213 &tc_add_call(tc, TC_CALL_draw_single_drawid, tc_draw_single_drawid)->base :
3214 tc_add_call(tc, TC_CALL_draw_single, tc_draw_single);
3215 if (index_size) {
3216 if (!info->take_index_buffer_ownership) {
3217 tc_set_resource_reference(&p->info.index.resource,
3218 info->index.resource);
3219 }
3220 tc_add_to_buffer_list(&tc->buffer_lists[tc->next_buf_list], info->index.resource);
3221 }
3222 if (drawid_offset > 0)
3223 ((struct tc_draw_single_drawid*)p)->drawid_offset = drawid_offset;
3224 memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX);
3225 /* u_threaded_context stores start/count in min/max_index for single draws. */
3226 p->info.min_index = draws[0].start;
3227 p->info.max_index = draws[0].count;
3228 p->index_bias = draws[0].index_bias;
3229 }
3230 return;
3231 }
3232
3233 const int draw_overhead_bytes = sizeof(struct tc_draw_multi);
3234 const int one_draw_slot_bytes = sizeof(((struct tc_draw_multi*)NULL)->slot[0]);
3235 const int slots_for_one_draw = DIV_ROUND_UP(draw_overhead_bytes + one_draw_slot_bytes,
3236 sizeof(struct tc_call_base));
3237 /* Multi draw. */
3238 if (index_size && has_user_indices) {
3239 struct pipe_resource *buffer = NULL;
3240 unsigned buffer_offset, total_count = 0;
3241 unsigned index_size_shift = util_logbase2(index_size);
3242 uint8_t *ptr = NULL;
3243
3244 /* Get the total count. */
3245 for (unsigned i = 0; i < num_draws; i++)
3246 total_count += draws[i].count;
3247
3248 if (!total_count)
3249 return;
3250
3251 /* Allocate space for all index buffers.
3252 *
3253 * This must be done before adding draw_vbo, because it could generate
3254 * e.g. transfer_unmap and flush partially-uninitialized draw_vbo
3255 * to the driver if it was done afterwards.
3256 */
3257 u_upload_alloc(tc->base.stream_uploader, 0,
3258 total_count << index_size_shift, 4,
3259 &buffer_offset, &buffer, (void**)&ptr);
3260 if (unlikely(!buffer))
3261 return;
3262
3263 int total_offset = 0;
3264 while (num_draws) {
3265 struct tc_batch *next = &tc->batch_slots[tc->next];
3266
3267 int nb_slots_left = TC_SLOTS_PER_BATCH - next->num_total_slots;
3268 /* If there isn't enough place for one draw, try to fill the next one */
3269 if (nb_slots_left < slots_for_one_draw)
3270 nb_slots_left = TC_SLOTS_PER_BATCH;
3271 const int size_left_bytes = nb_slots_left * sizeof(struct tc_call_base);
3272
3273 /* How many draws can we fit in the current batch */
3274 const int dr = MIN2(num_draws, (size_left_bytes - draw_overhead_bytes) / one_draw_slot_bytes);
3275
3276 struct tc_draw_multi *p =
3277 tc_add_slot_based_call(tc, TC_CALL_draw_multi, tc_draw_multi,
3278 dr);
3279 memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_INDEXBUF_AND_MIN_MAX_INDEX);
3280 p->info.index.resource = buffer;
3281 p->num_draws = dr;
3282
3283 /* Upload index buffers. */
3284 for (unsigned i = 0, offset = 0; i < dr; i++) {
3285 unsigned count = draws[i + total_offset].count;
3286
3287 if (!count) {
3288 p->slot[i].start = 0;
3289 p->slot[i].count = 0;
3290 p->slot[i].index_bias = 0;
3291 continue;
3292 }
3293
3294 unsigned size = count << index_size_shift;
3295 memcpy(ptr + offset,
3296 (uint8_t*)info->index.user +
3297 (draws[i + total_offset].start << index_size_shift), size);
3298 p->slot[i].start = (buffer_offset + offset) >> index_size_shift;
3299 p->slot[i].count = count;
3300 p->slot[i].index_bias = draws[i + total_offset].index_bias;
3301 offset += size;
3302 }
3303
3304 total_offset += dr;
3305 num_draws -= dr;
3306 }
3307 } else {
3308 int total_offset = 0;
3309 bool take_index_buffer_ownership = info->take_index_buffer_ownership;
3310 while (num_draws) {
3311 struct tc_batch *next = &tc->batch_slots[tc->next];
3312
3313 int nb_slots_left = TC_SLOTS_PER_BATCH - next->num_total_slots;
3314 /* If there isn't enough place for one draw, try to fill the next one */
3315 if (nb_slots_left < slots_for_one_draw)
3316 nb_slots_left = TC_SLOTS_PER_BATCH;
3317 const int size_left_bytes = nb_slots_left * sizeof(struct tc_call_base);
3318
3319 /* How many draws can we fit in the current batch */
3320 const int dr = MIN2(num_draws, (size_left_bytes - draw_overhead_bytes) / one_draw_slot_bytes);
3321
3322 /* Non-indexed call or indexed with a real index buffer. */
3323 struct tc_draw_multi *p =
3324 tc_add_slot_based_call(tc, TC_CALL_draw_multi, tc_draw_multi,
3325 dr);
3326 if (index_size) {
3327 if (!take_index_buffer_ownership) {
3328 tc_set_resource_reference(&p->info.index.resource,
3329 info->index.resource);
3330 }
3331 tc_add_to_buffer_list(&tc->buffer_lists[tc->next_buf_list], info->index.resource);
3332 }
3333 take_index_buffer_ownership = false;
3334 memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX);
3335 p->num_draws = dr;
3336 memcpy(p->slot, &draws[total_offset], sizeof(draws[0]) * dr);
3337 num_draws -= dr;
3338
3339 total_offset += dr;
3340 }
3341 }
3342 }
3343
3344 struct tc_draw_vstate_single {
3345 struct tc_call_base base;
3346 struct pipe_draw_start_count_bias draw;
3347
3348 /* The following states must be together without holes because they are
3349 * compared by draw merging.
3350 */
3351 struct pipe_vertex_state *state;
3352 uint32_t partial_velem_mask;
3353 struct pipe_draw_vertex_state_info info;
3354 };
3355
3356 static bool
is_next_call_a_mergeable_draw_vstate(struct tc_draw_vstate_single * first,struct tc_draw_vstate_single * next)3357 is_next_call_a_mergeable_draw_vstate(struct tc_draw_vstate_single *first,
3358 struct tc_draw_vstate_single *next)
3359 {
3360 if (next->base.call_id != TC_CALL_draw_vstate_single)
3361 return false;
3362
3363 return !memcmp(&first->state, &next->state,
3364 offsetof(struct tc_draw_vstate_single, info) +
3365 sizeof(struct pipe_draw_vertex_state_info) -
3366 offsetof(struct tc_draw_vstate_single, state));
3367 }
3368
3369 static uint16_t
tc_call_draw_vstate_single(struct pipe_context * pipe,void * call,uint64_t * last_ptr)3370 tc_call_draw_vstate_single(struct pipe_context *pipe, void *call, uint64_t *last_ptr)
3371 {
3372 /* Draw call merging. */
3373 struct tc_draw_vstate_single *first = to_call(call, tc_draw_vstate_single);
3374 struct tc_draw_vstate_single *last = (struct tc_draw_vstate_single *)last_ptr;
3375 struct tc_draw_vstate_single *next = get_next_call(first, tc_draw_vstate_single);
3376
3377 /* If at least 2 consecutive draw calls can be merged... */
3378 if (next != last &&
3379 is_next_call_a_mergeable_draw_vstate(first, next)) {
3380 /* The maximum number of merged draws is given by the batch size. */
3381 struct pipe_draw_start_count_bias draws[TC_SLOTS_PER_BATCH /
3382 call_size(tc_draw_vstate_single)];
3383 unsigned num_draws = 2;
3384
3385 draws[0] = first->draw;
3386 draws[1] = next->draw;
3387
3388 /* Find how many other draws can be merged. */
3389 next = get_next_call(next, tc_draw_vstate_single);
3390 for (; next != last &&
3391 is_next_call_a_mergeable_draw_vstate(first, next);
3392 next = get_next_call(next, tc_draw_vstate_single),
3393 num_draws++)
3394 draws[num_draws] = next->draw;
3395
3396 pipe->draw_vertex_state(pipe, first->state, first->partial_velem_mask,
3397 first->info, draws, num_draws);
3398 /* Since all draws use the same state, drop all references at once. */
3399 tc_drop_vertex_state_references(first->state, num_draws);
3400
3401 return call_size(tc_draw_vstate_single) * num_draws;
3402 }
3403
3404 pipe->draw_vertex_state(pipe, first->state, first->partial_velem_mask,
3405 first->info, &first->draw, 1);
3406 tc_drop_vertex_state_references(first->state, 1);
3407 return call_size(tc_draw_vstate_single);
3408 }
3409
3410 struct tc_draw_vstate_multi {
3411 struct tc_call_base base;
3412 uint32_t partial_velem_mask;
3413 struct pipe_draw_vertex_state_info info;
3414 unsigned num_draws;
3415 struct pipe_vertex_state *state;
3416 struct pipe_draw_start_count_bias slot[0];
3417 };
3418
3419 static uint16_t
tc_call_draw_vstate_multi(struct pipe_context * pipe,void * call,uint64_t * last)3420 tc_call_draw_vstate_multi(struct pipe_context *pipe, void *call, uint64_t *last)
3421 {
3422 struct tc_draw_vstate_multi *info = (struct tc_draw_vstate_multi*)call;
3423
3424 pipe->draw_vertex_state(pipe, info->state, info->partial_velem_mask,
3425 info->info, info->slot, info->num_draws);
3426 tc_drop_vertex_state_references(info->state, 1);
3427 return info->base.num_slots;
3428 }
3429
3430 static void
tc_draw_vertex_state(struct pipe_context * _pipe,struct pipe_vertex_state * state,uint32_t partial_velem_mask,struct pipe_draw_vertex_state_info info,const struct pipe_draw_start_count_bias * draws,unsigned num_draws)3431 tc_draw_vertex_state(struct pipe_context *_pipe,
3432 struct pipe_vertex_state *state,
3433 uint32_t partial_velem_mask,
3434 struct pipe_draw_vertex_state_info info,
3435 const struct pipe_draw_start_count_bias *draws,
3436 unsigned num_draws)
3437 {
3438 struct threaded_context *tc = threaded_context(_pipe);
3439
3440 if (unlikely(tc->add_all_gfx_bindings_to_buffer_list))
3441 tc_add_all_gfx_bindings_to_buffer_list(tc);
3442
3443 if (num_draws == 1) {
3444 /* Single draw. */
3445 struct tc_draw_vstate_single *p =
3446 tc_add_call(tc, TC_CALL_draw_vstate_single, tc_draw_vstate_single);
3447 p->partial_velem_mask = partial_velem_mask;
3448 p->draw = draws[0];
3449 p->info.mode = info.mode;
3450 p->info.take_vertex_state_ownership = false;
3451
3452 /* This should be always 0 for simplicity because we assume that
3453 * index_bias doesn't vary.
3454 */
3455 assert(draws[0].index_bias == 0);
3456
3457 if (!info.take_vertex_state_ownership)
3458 tc_set_vertex_state_reference(&p->state, state);
3459 else
3460 p->state = state;
3461 return;
3462 }
3463
3464 const int draw_overhead_bytes = sizeof(struct tc_draw_vstate_multi);
3465 const int one_draw_slot_bytes = sizeof(((struct tc_draw_vstate_multi*)NULL)->slot[0]);
3466 const int slots_for_one_draw = DIV_ROUND_UP(draw_overhead_bytes + one_draw_slot_bytes,
3467 sizeof(struct tc_call_base));
3468 /* Multi draw. */
3469 int total_offset = 0;
3470 bool take_vertex_state_ownership = info.take_vertex_state_ownership;
3471 while (num_draws) {
3472 struct tc_batch *next = &tc->batch_slots[tc->next];
3473
3474 int nb_slots_left = TC_SLOTS_PER_BATCH - next->num_total_slots;
3475 /* If there isn't enough place for one draw, try to fill the next one */
3476 if (nb_slots_left < slots_for_one_draw)
3477 nb_slots_left = TC_SLOTS_PER_BATCH;
3478 const int size_left_bytes = nb_slots_left * sizeof(struct tc_call_base);
3479
3480 /* How many draws can we fit in the current batch */
3481 const int dr = MIN2(num_draws, (size_left_bytes - draw_overhead_bytes) / one_draw_slot_bytes);
3482
3483 /* Non-indexed call or indexed with a real index buffer. */
3484 struct tc_draw_vstate_multi *p =
3485 tc_add_slot_based_call(tc, TC_CALL_draw_vstate_multi, tc_draw_vstate_multi, dr);
3486
3487 if (!take_vertex_state_ownership)
3488 tc_set_vertex_state_reference(&p->state, state);
3489 else
3490 p->state = state;
3491
3492 take_vertex_state_ownership = false;
3493 p->partial_velem_mask = partial_velem_mask;
3494 p->info.mode = info.mode;
3495 p->info.take_vertex_state_ownership = false;
3496 p->num_draws = dr;
3497 memcpy(p->slot, &draws[total_offset], sizeof(draws[0]) * dr);
3498 num_draws -= dr;
3499
3500 total_offset += dr;
3501 }
3502 }
3503
3504 struct tc_launch_grid_call {
3505 struct tc_call_base base;
3506 struct pipe_grid_info info;
3507 };
3508
3509 static uint16_t
tc_call_launch_grid(struct pipe_context * pipe,void * call,uint64_t * last)3510 tc_call_launch_grid(struct pipe_context *pipe, void *call, uint64_t *last)
3511 {
3512 struct pipe_grid_info *p = &to_call(call, tc_launch_grid_call)->info;
3513
3514 pipe->launch_grid(pipe, p);
3515 tc_drop_resource_reference(p->indirect);
3516 return call_size(tc_launch_grid_call);
3517 }
3518
3519 static void
tc_launch_grid(struct pipe_context * _pipe,const struct pipe_grid_info * info)3520 tc_launch_grid(struct pipe_context *_pipe,
3521 const struct pipe_grid_info *info)
3522 {
3523 struct threaded_context *tc = threaded_context(_pipe);
3524 struct tc_launch_grid_call *p = tc_add_call(tc, TC_CALL_launch_grid,
3525 tc_launch_grid_call);
3526 assert(info->input == NULL);
3527
3528 if (unlikely(tc->add_all_compute_bindings_to_buffer_list))
3529 tc_add_all_compute_bindings_to_buffer_list(tc);
3530
3531 tc_set_resource_reference(&p->info.indirect, info->indirect);
3532 memcpy(&p->info, info, sizeof(*info));
3533
3534 if (info->indirect)
3535 tc_add_to_buffer_list(&tc->buffer_lists[tc->next_buf_list], info->indirect);
3536 }
3537
3538 static uint16_t
tc_call_resource_copy_region(struct pipe_context * pipe,void * call,uint64_t * last)3539 tc_call_resource_copy_region(struct pipe_context *pipe, void *call, uint64_t *last)
3540 {
3541 struct tc_resource_copy_region *p = to_call(call, tc_resource_copy_region);
3542
3543 pipe->resource_copy_region(pipe, p->dst, p->dst_level, p->dstx, p->dsty,
3544 p->dstz, p->src, p->src_level, &p->src_box);
3545 tc_drop_resource_reference(p->dst);
3546 tc_drop_resource_reference(p->src);
3547 return call_size(tc_resource_copy_region);
3548 }
3549
3550 static void
tc_resource_copy_region(struct pipe_context * _pipe,struct pipe_resource * dst,unsigned dst_level,unsigned dstx,unsigned dsty,unsigned dstz,struct pipe_resource * src,unsigned src_level,const struct pipe_box * src_box)3551 tc_resource_copy_region(struct pipe_context *_pipe,
3552 struct pipe_resource *dst, unsigned dst_level,
3553 unsigned dstx, unsigned dsty, unsigned dstz,
3554 struct pipe_resource *src, unsigned src_level,
3555 const struct pipe_box *src_box)
3556 {
3557 struct threaded_context *tc = threaded_context(_pipe);
3558 struct threaded_resource *tdst = threaded_resource(dst);
3559 struct tc_resource_copy_region *p =
3560 tc_add_call(tc, TC_CALL_resource_copy_region,
3561 tc_resource_copy_region);
3562
3563 tc_set_resource_reference(&p->dst, dst);
3564 p->dst_level = dst_level;
3565 p->dstx = dstx;
3566 p->dsty = dsty;
3567 p->dstz = dstz;
3568 tc_set_resource_reference(&p->src, src);
3569 p->src_level = src_level;
3570 p->src_box = *src_box;
3571
3572 if (dst->target == PIPE_BUFFER) {
3573 struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
3574
3575 tc_add_to_buffer_list(next, src);
3576 tc_add_to_buffer_list(next, dst);
3577
3578 util_range_add(&tdst->b, &tdst->valid_buffer_range,
3579 dstx, dstx + src_box->width);
3580 }
3581 }
3582
3583 struct tc_blit_call {
3584 struct tc_call_base base;
3585 struct pipe_blit_info info;
3586 };
3587
3588 static uint16_t
tc_call_blit(struct pipe_context * pipe,void * call,uint64_t * last)3589 tc_call_blit(struct pipe_context *pipe, void *call, uint64_t *last)
3590 {
3591 struct pipe_blit_info *blit = &to_call(call, tc_blit_call)->info;
3592
3593 pipe->blit(pipe, blit);
3594 tc_drop_resource_reference(blit->dst.resource);
3595 tc_drop_resource_reference(blit->src.resource);
3596 return call_size(tc_blit_call);
3597 }
3598
3599 static void
tc_blit(struct pipe_context * _pipe,const struct pipe_blit_info * info)3600 tc_blit(struct pipe_context *_pipe, const struct pipe_blit_info *info)
3601 {
3602 struct threaded_context *tc = threaded_context(_pipe);
3603 struct tc_blit_call *blit = tc_add_call(tc, TC_CALL_blit, tc_blit_call);
3604
3605 tc_set_resource_reference(&blit->info.dst.resource, info->dst.resource);
3606 tc_set_resource_reference(&blit->info.src.resource, info->src.resource);
3607 memcpy(&blit->info, info, sizeof(*info));
3608 }
3609
3610 struct tc_generate_mipmap {
3611 struct tc_call_base base;
3612 enum pipe_format format;
3613 unsigned base_level;
3614 unsigned last_level;
3615 unsigned first_layer;
3616 unsigned last_layer;
3617 struct pipe_resource *res;
3618 };
3619
3620 static uint16_t
tc_call_generate_mipmap(struct pipe_context * pipe,void * call,uint64_t * last)3621 tc_call_generate_mipmap(struct pipe_context *pipe, void *call, uint64_t *last)
3622 {
3623 struct tc_generate_mipmap *p = to_call(call, tc_generate_mipmap);
3624 ASSERTED bool result = pipe->generate_mipmap(pipe, p->res, p->format,
3625 p->base_level,
3626 p->last_level,
3627 p->first_layer,
3628 p->last_layer);
3629 assert(result);
3630 tc_drop_resource_reference(p->res);
3631 return call_size(tc_generate_mipmap);
3632 }
3633
3634 static bool
tc_generate_mipmap(struct pipe_context * _pipe,struct pipe_resource * res,enum pipe_format format,unsigned base_level,unsigned last_level,unsigned first_layer,unsigned last_layer)3635 tc_generate_mipmap(struct pipe_context *_pipe,
3636 struct pipe_resource *res,
3637 enum pipe_format format,
3638 unsigned base_level,
3639 unsigned last_level,
3640 unsigned first_layer,
3641 unsigned last_layer)
3642 {
3643 struct threaded_context *tc = threaded_context(_pipe);
3644 struct pipe_context *pipe = tc->pipe;
3645 struct pipe_screen *screen = pipe->screen;
3646 unsigned bind = PIPE_BIND_SAMPLER_VIEW;
3647
3648 if (util_format_is_depth_or_stencil(format))
3649 bind = PIPE_BIND_DEPTH_STENCIL;
3650 else
3651 bind = PIPE_BIND_RENDER_TARGET;
3652
3653 if (!screen->is_format_supported(screen, format, res->target,
3654 res->nr_samples, res->nr_storage_samples,
3655 bind))
3656 return false;
3657
3658 struct tc_generate_mipmap *p =
3659 tc_add_call(tc, TC_CALL_generate_mipmap, tc_generate_mipmap);
3660
3661 tc_set_resource_reference(&p->res, res);
3662 p->format = format;
3663 p->base_level = base_level;
3664 p->last_level = last_level;
3665 p->first_layer = first_layer;
3666 p->last_layer = last_layer;
3667 return true;
3668 }
3669
3670 struct tc_resource_call {
3671 struct tc_call_base base;
3672 struct pipe_resource *resource;
3673 };
3674
3675 static uint16_t
tc_call_flush_resource(struct pipe_context * pipe,void * call,uint64_t * last)3676 tc_call_flush_resource(struct pipe_context *pipe, void *call, uint64_t *last)
3677 {
3678 struct pipe_resource *resource = to_call(call, tc_resource_call)->resource;
3679
3680 pipe->flush_resource(pipe, resource);
3681 tc_drop_resource_reference(resource);
3682 return call_size(tc_resource_call);
3683 }
3684
3685 static void
tc_flush_resource(struct pipe_context * _pipe,struct pipe_resource * resource)3686 tc_flush_resource(struct pipe_context *_pipe, struct pipe_resource *resource)
3687 {
3688 struct threaded_context *tc = threaded_context(_pipe);
3689 struct tc_resource_call *call = tc_add_call(tc, TC_CALL_flush_resource,
3690 tc_resource_call);
3691
3692 tc_set_resource_reference(&call->resource, resource);
3693 }
3694
3695 static uint16_t
tc_call_invalidate_resource(struct pipe_context * pipe,void * call,uint64_t * last)3696 tc_call_invalidate_resource(struct pipe_context *pipe, void *call, uint64_t *last)
3697 {
3698 struct pipe_resource *resource = to_call(call, tc_resource_call)->resource;
3699
3700 pipe->invalidate_resource(pipe, resource);
3701 tc_drop_resource_reference(resource);
3702 return call_size(tc_resource_call);
3703 }
3704
3705 static void
tc_invalidate_resource(struct pipe_context * _pipe,struct pipe_resource * resource)3706 tc_invalidate_resource(struct pipe_context *_pipe,
3707 struct pipe_resource *resource)
3708 {
3709 struct threaded_context *tc = threaded_context(_pipe);
3710
3711 if (resource->target == PIPE_BUFFER) {
3712 tc_invalidate_buffer(tc, threaded_resource(resource));
3713 return;
3714 }
3715
3716 struct tc_resource_call *call = tc_add_call(tc, TC_CALL_invalidate_resource,
3717 tc_resource_call);
3718 tc_set_resource_reference(&call->resource, resource);
3719 }
3720
3721 struct tc_clear {
3722 struct tc_call_base base;
3723 bool scissor_state_set;
3724 uint8_t stencil;
3725 uint16_t buffers;
3726 float depth;
3727 struct pipe_scissor_state scissor_state;
3728 union pipe_color_union color;
3729 };
3730
3731 static uint16_t
tc_call_clear(struct pipe_context * pipe,void * call,uint64_t * last)3732 tc_call_clear(struct pipe_context *pipe, void *call, uint64_t *last)
3733 {
3734 struct tc_clear *p = to_call(call, tc_clear);
3735
3736 pipe->clear(pipe, p->buffers, p->scissor_state_set ? &p->scissor_state : NULL, &p->color, p->depth, p->stencil);
3737 return call_size(tc_clear);
3738 }
3739
3740 static void
tc_clear(struct pipe_context * _pipe,unsigned buffers,const struct pipe_scissor_state * scissor_state,const union pipe_color_union * color,double depth,unsigned stencil)3741 tc_clear(struct pipe_context *_pipe, unsigned buffers, const struct pipe_scissor_state *scissor_state,
3742 const union pipe_color_union *color, double depth,
3743 unsigned stencil)
3744 {
3745 struct threaded_context *tc = threaded_context(_pipe);
3746 struct tc_clear *p = tc_add_call(tc, TC_CALL_clear, tc_clear);
3747
3748 p->buffers = buffers;
3749 if (scissor_state)
3750 p->scissor_state = *scissor_state;
3751 p->scissor_state_set = !!scissor_state;
3752 p->color = *color;
3753 p->depth = depth;
3754 p->stencil = stencil;
3755 }
3756
3757 struct tc_clear_render_target {
3758 struct tc_call_base base;
3759 bool render_condition_enabled;
3760 unsigned dstx;
3761 unsigned dsty;
3762 unsigned width;
3763 unsigned height;
3764 union pipe_color_union color;
3765 struct pipe_surface *dst;
3766 };
3767
3768 static uint16_t
tc_call_clear_render_target(struct pipe_context * pipe,void * call,uint64_t * last)3769 tc_call_clear_render_target(struct pipe_context *pipe, void *call, uint64_t *last)
3770 {
3771 struct tc_clear_render_target *p = to_call(call, tc_clear_render_target);
3772
3773 pipe->clear_render_target(pipe, p->dst, &p->color, p->dstx, p->dsty, p->width, p->height,
3774 p->render_condition_enabled);
3775 tc_drop_surface_reference(p->dst);
3776 return call_size(tc_clear_render_target);
3777 }
3778
3779 static void
tc_clear_render_target(struct pipe_context * _pipe,struct pipe_surface * dst,const union pipe_color_union * color,unsigned dstx,unsigned dsty,unsigned width,unsigned height,bool render_condition_enabled)3780 tc_clear_render_target(struct pipe_context *_pipe,
3781 struct pipe_surface *dst,
3782 const union pipe_color_union *color,
3783 unsigned dstx, unsigned dsty,
3784 unsigned width, unsigned height,
3785 bool render_condition_enabled)
3786 {
3787 struct threaded_context *tc = threaded_context(_pipe);
3788 struct tc_clear_render_target *p = tc_add_call(tc, TC_CALL_clear_render_target, tc_clear_render_target);
3789 p->dst = NULL;
3790 pipe_surface_reference(&p->dst, dst);
3791 p->color = *color;
3792 p->dstx = dstx;
3793 p->dsty = dsty;
3794 p->width = width;
3795 p->height = height;
3796 p->render_condition_enabled = render_condition_enabled;
3797 }
3798
3799
3800 struct tc_clear_depth_stencil {
3801 struct tc_call_base base;
3802 bool render_condition_enabled;
3803 float depth;
3804 unsigned clear_flags;
3805 unsigned stencil;
3806 unsigned dstx;
3807 unsigned dsty;
3808 unsigned width;
3809 unsigned height;
3810 struct pipe_surface *dst;
3811 };
3812
3813
3814 static uint16_t
tc_call_clear_depth_stencil(struct pipe_context * pipe,void * call,uint64_t * last)3815 tc_call_clear_depth_stencil(struct pipe_context *pipe, void *call, uint64_t *last)
3816 {
3817 struct tc_clear_depth_stencil *p = to_call(call, tc_clear_depth_stencil);
3818
3819 pipe->clear_depth_stencil(pipe, p->dst, p->clear_flags, p->depth, p->stencil,
3820 p->dstx, p->dsty, p->width, p->height,
3821 p->render_condition_enabled);
3822 tc_drop_surface_reference(p->dst);
3823 return call_size(tc_clear_depth_stencil);
3824 }
3825
3826 static void
tc_clear_depth_stencil(struct pipe_context * _pipe,struct pipe_surface * dst,unsigned clear_flags,double depth,unsigned stencil,unsigned dstx,unsigned dsty,unsigned width,unsigned height,bool render_condition_enabled)3827 tc_clear_depth_stencil(struct pipe_context *_pipe,
3828 struct pipe_surface *dst, unsigned clear_flags,
3829 double depth, unsigned stencil, unsigned dstx,
3830 unsigned dsty, unsigned width, unsigned height,
3831 bool render_condition_enabled)
3832 {
3833 struct threaded_context *tc = threaded_context(_pipe);
3834 struct tc_clear_depth_stencil *p = tc_add_call(tc, TC_CALL_clear_depth_stencil, tc_clear_depth_stencil);
3835 p->dst = NULL;
3836 pipe_surface_reference(&p->dst, dst);
3837 p->clear_flags = clear_flags;
3838 p->depth = depth;
3839 p->stencil = stencil;
3840 p->dstx = dstx;
3841 p->dsty = dsty;
3842 p->width = width;
3843 p->height = height;
3844 p->render_condition_enabled = render_condition_enabled;
3845 }
3846
3847 struct tc_clear_buffer {
3848 struct tc_call_base base;
3849 uint8_t clear_value_size;
3850 unsigned offset;
3851 unsigned size;
3852 char clear_value[16];
3853 struct pipe_resource *res;
3854 };
3855
3856 static uint16_t
tc_call_clear_buffer(struct pipe_context * pipe,void * call,uint64_t * last)3857 tc_call_clear_buffer(struct pipe_context *pipe, void *call, uint64_t *last)
3858 {
3859 struct tc_clear_buffer *p = to_call(call, tc_clear_buffer);
3860
3861 pipe->clear_buffer(pipe, p->res, p->offset, p->size, p->clear_value,
3862 p->clear_value_size);
3863 tc_drop_resource_reference(p->res);
3864 return call_size(tc_clear_buffer);
3865 }
3866
3867 static void
tc_clear_buffer(struct pipe_context * _pipe,struct pipe_resource * res,unsigned offset,unsigned size,const void * clear_value,int clear_value_size)3868 tc_clear_buffer(struct pipe_context *_pipe, struct pipe_resource *res,
3869 unsigned offset, unsigned size,
3870 const void *clear_value, int clear_value_size)
3871 {
3872 struct threaded_context *tc = threaded_context(_pipe);
3873 struct threaded_resource *tres = threaded_resource(res);
3874 struct tc_clear_buffer *p =
3875 tc_add_call(tc, TC_CALL_clear_buffer, tc_clear_buffer);
3876
3877 tc_set_resource_reference(&p->res, res);
3878 tc_add_to_buffer_list(&tc->buffer_lists[tc->next_buf_list], res);
3879 p->offset = offset;
3880 p->size = size;
3881 memcpy(p->clear_value, clear_value, clear_value_size);
3882 p->clear_value_size = clear_value_size;
3883
3884 util_range_add(&tres->b, &tres->valid_buffer_range, offset, offset + size);
3885 }
3886
3887 struct tc_clear_texture {
3888 struct tc_call_base base;
3889 unsigned level;
3890 struct pipe_box box;
3891 char data[16];
3892 struct pipe_resource *res;
3893 };
3894
3895 static uint16_t
tc_call_clear_texture(struct pipe_context * pipe,void * call,uint64_t * last)3896 tc_call_clear_texture(struct pipe_context *pipe, void *call, uint64_t *last)
3897 {
3898 struct tc_clear_texture *p = to_call(call, tc_clear_texture);
3899
3900 pipe->clear_texture(pipe, p->res, p->level, &p->box, p->data);
3901 tc_drop_resource_reference(p->res);
3902 return call_size(tc_clear_texture);
3903 }
3904
3905 static void
tc_clear_texture(struct pipe_context * _pipe,struct pipe_resource * res,unsigned level,const struct pipe_box * box,const void * data)3906 tc_clear_texture(struct pipe_context *_pipe, struct pipe_resource *res,
3907 unsigned level, const struct pipe_box *box, const void *data)
3908 {
3909 struct threaded_context *tc = threaded_context(_pipe);
3910 struct tc_clear_texture *p =
3911 tc_add_call(tc, TC_CALL_clear_texture, tc_clear_texture);
3912
3913 tc_set_resource_reference(&p->res, res);
3914 p->level = level;
3915 p->box = *box;
3916 memcpy(p->data, data,
3917 util_format_get_blocksize(res->format));
3918 }
3919
3920 struct tc_resource_commit {
3921 struct tc_call_base base;
3922 bool commit;
3923 unsigned level;
3924 struct pipe_box box;
3925 struct pipe_resource *res;
3926 };
3927
3928 static uint16_t
tc_call_resource_commit(struct pipe_context * pipe,void * call,uint64_t * last)3929 tc_call_resource_commit(struct pipe_context *pipe, void *call, uint64_t *last)
3930 {
3931 struct tc_resource_commit *p = to_call(call, tc_resource_commit);
3932
3933 pipe->resource_commit(pipe, p->res, p->level, &p->box, p->commit);
3934 tc_drop_resource_reference(p->res);
3935 return call_size(tc_resource_commit);
3936 }
3937
3938 static bool
tc_resource_commit(struct pipe_context * _pipe,struct pipe_resource * res,unsigned level,struct pipe_box * box,bool commit)3939 tc_resource_commit(struct pipe_context *_pipe, struct pipe_resource *res,
3940 unsigned level, struct pipe_box *box, bool commit)
3941 {
3942 struct threaded_context *tc = threaded_context(_pipe);
3943 struct tc_resource_commit *p =
3944 tc_add_call(tc, TC_CALL_resource_commit, tc_resource_commit);
3945
3946 tc_set_resource_reference(&p->res, res);
3947 p->level = level;
3948 p->box = *box;
3949 p->commit = commit;
3950 return true; /* we don't care about the return value for this call */
3951 }
3952
3953 static unsigned
tc_init_intel_perf_query_info(struct pipe_context * _pipe)3954 tc_init_intel_perf_query_info(struct pipe_context *_pipe)
3955 {
3956 struct threaded_context *tc = threaded_context(_pipe);
3957 struct pipe_context *pipe = tc->pipe;
3958
3959 return pipe->init_intel_perf_query_info(pipe);
3960 }
3961
3962 static void
tc_get_intel_perf_query_info(struct pipe_context * _pipe,unsigned query_index,const char ** name,uint32_t * data_size,uint32_t * n_counters,uint32_t * n_active)3963 tc_get_intel_perf_query_info(struct pipe_context *_pipe,
3964 unsigned query_index,
3965 const char **name,
3966 uint32_t *data_size,
3967 uint32_t *n_counters,
3968 uint32_t *n_active)
3969 {
3970 struct threaded_context *tc = threaded_context(_pipe);
3971 struct pipe_context *pipe = tc->pipe;
3972
3973 tc_sync(tc); /* n_active vs begin/end_intel_perf_query */
3974 pipe->get_intel_perf_query_info(pipe, query_index, name, data_size,
3975 n_counters, n_active);
3976 }
3977
3978 static void
tc_get_intel_perf_query_counter_info(struct pipe_context * _pipe,unsigned query_index,unsigned counter_index,const char ** name,const char ** desc,uint32_t * offset,uint32_t * data_size,uint32_t * type_enum,uint32_t * data_type_enum,uint64_t * raw_max)3979 tc_get_intel_perf_query_counter_info(struct pipe_context *_pipe,
3980 unsigned query_index,
3981 unsigned counter_index,
3982 const char **name,
3983 const char **desc,
3984 uint32_t *offset,
3985 uint32_t *data_size,
3986 uint32_t *type_enum,
3987 uint32_t *data_type_enum,
3988 uint64_t *raw_max)
3989 {
3990 struct threaded_context *tc = threaded_context(_pipe);
3991 struct pipe_context *pipe = tc->pipe;
3992
3993 pipe->get_intel_perf_query_counter_info(pipe, query_index, counter_index,
3994 name, desc, offset, data_size, type_enum, data_type_enum, raw_max);
3995 }
3996
3997 static struct pipe_query *
tc_new_intel_perf_query_obj(struct pipe_context * _pipe,unsigned query_index)3998 tc_new_intel_perf_query_obj(struct pipe_context *_pipe, unsigned query_index)
3999 {
4000 struct threaded_context *tc = threaded_context(_pipe);
4001 struct pipe_context *pipe = tc->pipe;
4002
4003 return pipe->new_intel_perf_query_obj(pipe, query_index);
4004 }
4005
4006 static uint16_t
tc_call_begin_intel_perf_query(struct pipe_context * pipe,void * call,uint64_t * last)4007 tc_call_begin_intel_perf_query(struct pipe_context *pipe, void *call, uint64_t *last)
4008 {
4009 (void)pipe->begin_intel_perf_query(pipe, to_call(call, tc_query_call)->query);
4010 return call_size(tc_query_call);
4011 }
4012
4013 static bool
tc_begin_intel_perf_query(struct pipe_context * _pipe,struct pipe_query * q)4014 tc_begin_intel_perf_query(struct pipe_context *_pipe, struct pipe_query *q)
4015 {
4016 struct threaded_context *tc = threaded_context(_pipe);
4017
4018 tc_add_call(tc, TC_CALL_begin_intel_perf_query, tc_query_call)->query = q;
4019
4020 /* assume success, begin failure can be signaled from get_intel_perf_query_data */
4021 return true;
4022 }
4023
4024 static uint16_t
tc_call_end_intel_perf_query(struct pipe_context * pipe,void * call,uint64_t * last)4025 tc_call_end_intel_perf_query(struct pipe_context *pipe, void *call, uint64_t *last)
4026 {
4027 pipe->end_intel_perf_query(pipe, to_call(call, tc_query_call)->query);
4028 return call_size(tc_query_call);
4029 }
4030
4031 static void
tc_end_intel_perf_query(struct pipe_context * _pipe,struct pipe_query * q)4032 tc_end_intel_perf_query(struct pipe_context *_pipe, struct pipe_query *q)
4033 {
4034 struct threaded_context *tc = threaded_context(_pipe);
4035
4036 tc_add_call(tc, TC_CALL_end_intel_perf_query, tc_query_call)->query = q;
4037 }
4038
4039 static void
tc_delete_intel_perf_query(struct pipe_context * _pipe,struct pipe_query * q)4040 tc_delete_intel_perf_query(struct pipe_context *_pipe, struct pipe_query *q)
4041 {
4042 struct threaded_context *tc = threaded_context(_pipe);
4043 struct pipe_context *pipe = tc->pipe;
4044
4045 tc_sync(tc); /* flush potentially pending begin/end_intel_perf_queries */
4046 pipe->delete_intel_perf_query(pipe, q);
4047 }
4048
4049 static void
tc_wait_intel_perf_query(struct pipe_context * _pipe,struct pipe_query * q)4050 tc_wait_intel_perf_query(struct pipe_context *_pipe, struct pipe_query *q)
4051 {
4052 struct threaded_context *tc = threaded_context(_pipe);
4053 struct pipe_context *pipe = tc->pipe;
4054
4055 tc_sync(tc); /* flush potentially pending begin/end_intel_perf_queries */
4056 pipe->wait_intel_perf_query(pipe, q);
4057 }
4058
4059 static bool
tc_is_intel_perf_query_ready(struct pipe_context * _pipe,struct pipe_query * q)4060 tc_is_intel_perf_query_ready(struct pipe_context *_pipe, struct pipe_query *q)
4061 {
4062 struct threaded_context *tc = threaded_context(_pipe);
4063 struct pipe_context *pipe = tc->pipe;
4064
4065 tc_sync(tc); /* flush potentially pending begin/end_intel_perf_queries */
4066 return pipe->is_intel_perf_query_ready(pipe, q);
4067 }
4068
4069 static bool
tc_get_intel_perf_query_data(struct pipe_context * _pipe,struct pipe_query * q,size_t data_size,uint32_t * data,uint32_t * bytes_written)4070 tc_get_intel_perf_query_data(struct pipe_context *_pipe,
4071 struct pipe_query *q,
4072 size_t data_size,
4073 uint32_t *data,
4074 uint32_t *bytes_written)
4075 {
4076 struct threaded_context *tc = threaded_context(_pipe);
4077 struct pipe_context *pipe = tc->pipe;
4078
4079 tc_sync(tc); /* flush potentially pending begin/end_intel_perf_queries */
4080 return pipe->get_intel_perf_query_data(pipe, q, data_size, data, bytes_written);
4081 }
4082
4083 /********************************************************************
4084 * callback
4085 */
4086
4087 struct tc_callback_call {
4088 struct tc_call_base base;
4089 void (*fn)(void *data);
4090 void *data;
4091 };
4092
4093 static uint16_t
tc_call_callback(UNUSED struct pipe_context * pipe,void * call,uint64_t * last)4094 tc_call_callback(UNUSED struct pipe_context *pipe, void *call, uint64_t *last)
4095 {
4096 struct tc_callback_call *p = to_call(call, tc_callback_call);
4097
4098 p->fn(p->data);
4099 return call_size(tc_callback_call);
4100 }
4101
4102 static void
tc_callback(struct pipe_context * _pipe,void (* fn)(void *),void * data,bool asap)4103 tc_callback(struct pipe_context *_pipe, void (*fn)(void *), void *data,
4104 bool asap)
4105 {
4106 struct threaded_context *tc = threaded_context(_pipe);
4107
4108 if (asap && tc_is_sync(tc)) {
4109 fn(data);
4110 return;
4111 }
4112
4113 struct tc_callback_call *p =
4114 tc_add_call(tc, TC_CALL_callback, tc_callback_call);
4115 p->fn = fn;
4116 p->data = data;
4117 }
4118
4119
4120 /********************************************************************
4121 * create & destroy
4122 */
4123
4124 static void
tc_destroy(struct pipe_context * _pipe)4125 tc_destroy(struct pipe_context *_pipe)
4126 {
4127 struct threaded_context *tc = threaded_context(_pipe);
4128 struct pipe_context *pipe = tc->pipe;
4129
4130 if (tc->base.const_uploader &&
4131 tc->base.stream_uploader != tc->base.const_uploader)
4132 u_upload_destroy(tc->base.const_uploader);
4133
4134 if (tc->base.stream_uploader)
4135 u_upload_destroy(tc->base.stream_uploader);
4136
4137 tc_sync(tc);
4138
4139 if (util_queue_is_initialized(&tc->queue)) {
4140 util_queue_destroy(&tc->queue);
4141
4142 for (unsigned i = 0; i < TC_MAX_BATCHES; i++) {
4143 util_queue_fence_destroy(&tc->batch_slots[i].fence);
4144 assert(!tc->batch_slots[i].token);
4145 }
4146 }
4147
4148 slab_destroy_child(&tc->pool_transfers);
4149 assert(tc->batch_slots[tc->next].num_total_slots == 0);
4150 pipe->destroy(pipe);
4151
4152 for (unsigned i = 0; i < TC_MAX_BUFFER_LISTS; i++) {
4153 if (!util_queue_fence_is_signalled(&tc->buffer_lists[i].driver_flushed_fence))
4154 util_queue_fence_signal(&tc->buffer_lists[i].driver_flushed_fence);
4155 util_queue_fence_destroy(&tc->buffer_lists[i].driver_flushed_fence);
4156 }
4157
4158 FREE(tc);
4159 }
4160
4161 static const tc_execute execute_func[TC_NUM_CALLS] = {
4162 #define CALL(name) tc_call_##name,
4163 #include "u_threaded_context_calls.h"
4164 #undef CALL
4165 };
4166
tc_driver_internal_flush_notify(struct threaded_context * tc)4167 void tc_driver_internal_flush_notify(struct threaded_context *tc)
4168 {
4169 /* Allow drivers to call this function even for internal contexts that
4170 * don't have tc. It simplifies drivers.
4171 */
4172 if (!tc)
4173 return;
4174
4175 /* Signal fences set by tc_batch_execute. */
4176 for (unsigned i = 0; i < tc->num_signal_fences_next_flush; i++)
4177 util_queue_fence_signal(tc->signal_fences_next_flush[i]);
4178
4179 tc->num_signal_fences_next_flush = 0;
4180 }
4181
4182 /**
4183 * Wrap an existing pipe_context into a threaded_context.
4184 *
4185 * \param pipe pipe_context to wrap
4186 * \param parent_transfer_pool parent slab pool set up for creating pipe_-
4187 * transfer objects; the driver should have one
4188 * in pipe_screen.
4189 * \param replace_buffer callback for replacing a pipe_resource's storage
4190 * with another pipe_resource's storage.
4191 * \param options optional TC options/callbacks
4192 * \param out if successful, the threaded_context will be returned here in
4193 * addition to the return value if "out" != NULL
4194 */
4195 struct pipe_context *
threaded_context_create(struct pipe_context * pipe,struct slab_parent_pool * parent_transfer_pool,tc_replace_buffer_storage_func replace_buffer,const struct threaded_context_options * options,struct threaded_context ** out)4196 threaded_context_create(struct pipe_context *pipe,
4197 struct slab_parent_pool *parent_transfer_pool,
4198 tc_replace_buffer_storage_func replace_buffer,
4199 const struct threaded_context_options *options,
4200 struct threaded_context **out)
4201 {
4202 struct threaded_context *tc;
4203
4204 if (!pipe)
4205 return NULL;
4206
4207 util_cpu_detect();
4208
4209 if (!debug_get_bool_option("GALLIUM_THREAD", util_get_cpu_caps()->nr_cpus > 1))
4210 return pipe;
4211
4212 tc = CALLOC_STRUCT(threaded_context);
4213 if (!tc) {
4214 pipe->destroy(pipe);
4215 return NULL;
4216 }
4217
4218 if (options)
4219 tc->options = *options;
4220
4221 pipe = trace_context_create_threaded(pipe->screen, pipe, &replace_buffer, &tc->options);
4222
4223 /* The driver context isn't wrapped, so set its "priv" to NULL. */
4224 pipe->priv = NULL;
4225
4226 tc->pipe = pipe;
4227 tc->replace_buffer_storage = replace_buffer;
4228 tc->map_buffer_alignment =
4229 pipe->screen->get_param(pipe->screen, PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT);
4230 tc->ubo_alignment =
4231 MAX2(pipe->screen->get_param(pipe->screen, PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT), 64);
4232 tc->base.priv = pipe; /* priv points to the wrapped driver context */
4233 tc->base.screen = pipe->screen;
4234 tc->base.destroy = tc_destroy;
4235 tc->base.callback = tc_callback;
4236
4237 tc->base.stream_uploader = u_upload_clone(&tc->base, pipe->stream_uploader);
4238 if (pipe->stream_uploader == pipe->const_uploader)
4239 tc->base.const_uploader = tc->base.stream_uploader;
4240 else
4241 tc->base.const_uploader = u_upload_clone(&tc->base, pipe->const_uploader);
4242
4243 if (!tc->base.stream_uploader || !tc->base.const_uploader)
4244 goto fail;
4245
4246 tc->use_forced_staging_uploads = true;
4247
4248 /* The queue size is the number of batches "waiting". Batches are removed
4249 * from the queue before being executed, so keep one tc_batch slot for that
4250 * execution. Also, keep one unused slot for an unflushed batch.
4251 */
4252 if (!util_queue_init(&tc->queue, "gdrv", TC_MAX_BATCHES - 2, 1, 0, NULL))
4253 goto fail;
4254
4255 for (unsigned i = 0; i < TC_MAX_BATCHES; i++) {
4256 #if !defined(NDEBUG) && TC_DEBUG >= 1
4257 tc->batch_slots[i].sentinel = TC_SENTINEL;
4258 #endif
4259 tc->batch_slots[i].tc = tc;
4260 util_queue_fence_init(&tc->batch_slots[i].fence);
4261 }
4262 for (unsigned i = 0; i < TC_MAX_BUFFER_LISTS; i++)
4263 util_queue_fence_init(&tc->buffer_lists[i].driver_flushed_fence);
4264
4265 list_inithead(&tc->unflushed_queries);
4266
4267 slab_create_child(&tc->pool_transfers, parent_transfer_pool);
4268
4269 /* If you have different limits in each shader stage, set the maximum. */
4270 struct pipe_screen *screen = pipe->screen;;
4271 tc->max_vertex_buffers =
4272 screen->get_param(screen, PIPE_CAP_MAX_VERTEX_BUFFERS);
4273 tc->max_const_buffers =
4274 screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT,
4275 PIPE_SHADER_CAP_MAX_CONST_BUFFERS);
4276 tc->max_shader_buffers =
4277 screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT,
4278 PIPE_SHADER_CAP_MAX_SHADER_BUFFERS);
4279 tc->max_images =
4280 screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT,
4281 PIPE_SHADER_CAP_MAX_SHADER_IMAGES);
4282 tc->max_samplers =
4283 screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT,
4284 PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS);
4285
4286 tc->base.set_context_param = tc_set_context_param; /* always set this */
4287
4288 #define CTX_INIT(_member) \
4289 tc->base._member = tc->pipe->_member ? tc_##_member : NULL
4290
4291 CTX_INIT(flush);
4292 CTX_INIT(draw_vbo);
4293 CTX_INIT(draw_vertex_state);
4294 CTX_INIT(launch_grid);
4295 CTX_INIT(resource_copy_region);
4296 CTX_INIT(blit);
4297 CTX_INIT(clear);
4298 CTX_INIT(clear_render_target);
4299 CTX_INIT(clear_depth_stencil);
4300 CTX_INIT(clear_buffer);
4301 CTX_INIT(clear_texture);
4302 CTX_INIT(flush_resource);
4303 CTX_INIT(generate_mipmap);
4304 CTX_INIT(render_condition);
4305 CTX_INIT(create_query);
4306 CTX_INIT(create_batch_query);
4307 CTX_INIT(destroy_query);
4308 CTX_INIT(begin_query);
4309 CTX_INIT(end_query);
4310 CTX_INIT(get_query_result);
4311 CTX_INIT(get_query_result_resource);
4312 CTX_INIT(set_active_query_state);
4313 CTX_INIT(create_blend_state);
4314 CTX_INIT(bind_blend_state);
4315 CTX_INIT(delete_blend_state);
4316 CTX_INIT(create_sampler_state);
4317 CTX_INIT(bind_sampler_states);
4318 CTX_INIT(delete_sampler_state);
4319 CTX_INIT(create_rasterizer_state);
4320 CTX_INIT(bind_rasterizer_state);
4321 CTX_INIT(delete_rasterizer_state);
4322 CTX_INIT(create_depth_stencil_alpha_state);
4323 CTX_INIT(bind_depth_stencil_alpha_state);
4324 CTX_INIT(delete_depth_stencil_alpha_state);
4325 CTX_INIT(create_fs_state);
4326 CTX_INIT(bind_fs_state);
4327 CTX_INIT(delete_fs_state);
4328 CTX_INIT(create_vs_state);
4329 CTX_INIT(bind_vs_state);
4330 CTX_INIT(delete_vs_state);
4331 CTX_INIT(create_gs_state);
4332 CTX_INIT(bind_gs_state);
4333 CTX_INIT(delete_gs_state);
4334 CTX_INIT(create_tcs_state);
4335 CTX_INIT(bind_tcs_state);
4336 CTX_INIT(delete_tcs_state);
4337 CTX_INIT(create_tes_state);
4338 CTX_INIT(bind_tes_state);
4339 CTX_INIT(delete_tes_state);
4340 CTX_INIT(create_compute_state);
4341 CTX_INIT(bind_compute_state);
4342 CTX_INIT(delete_compute_state);
4343 CTX_INIT(create_vertex_elements_state);
4344 CTX_INIT(bind_vertex_elements_state);
4345 CTX_INIT(delete_vertex_elements_state);
4346 CTX_INIT(set_blend_color);
4347 CTX_INIT(set_stencil_ref);
4348 CTX_INIT(set_sample_mask);
4349 CTX_INIT(set_min_samples);
4350 CTX_INIT(set_clip_state);
4351 CTX_INIT(set_constant_buffer);
4352 CTX_INIT(set_inlinable_constants);
4353 CTX_INIT(set_framebuffer_state);
4354 CTX_INIT(set_polygon_stipple);
4355 CTX_INIT(set_sample_locations);
4356 CTX_INIT(set_scissor_states);
4357 CTX_INIT(set_viewport_states);
4358 CTX_INIT(set_window_rectangles);
4359 CTX_INIT(set_sampler_views);
4360 CTX_INIT(set_tess_state);
4361 CTX_INIT(set_patch_vertices);
4362 CTX_INIT(set_shader_buffers);
4363 CTX_INIT(set_shader_images);
4364 CTX_INIT(set_vertex_buffers);
4365 CTX_INIT(create_stream_output_target);
4366 CTX_INIT(stream_output_target_destroy);
4367 CTX_INIT(set_stream_output_targets);
4368 CTX_INIT(create_sampler_view);
4369 CTX_INIT(sampler_view_destroy);
4370 CTX_INIT(create_surface);
4371 CTX_INIT(surface_destroy);
4372 CTX_INIT(buffer_map);
4373 CTX_INIT(texture_map);
4374 CTX_INIT(transfer_flush_region);
4375 CTX_INIT(buffer_unmap);
4376 CTX_INIT(texture_unmap);
4377 CTX_INIT(buffer_subdata);
4378 CTX_INIT(texture_subdata);
4379 CTX_INIT(texture_barrier);
4380 CTX_INIT(memory_barrier);
4381 CTX_INIT(resource_commit);
4382 CTX_INIT(create_video_codec);
4383 CTX_INIT(create_video_buffer);
4384 CTX_INIT(set_compute_resources);
4385 CTX_INIT(set_global_binding);
4386 CTX_INIT(get_sample_position);
4387 CTX_INIT(invalidate_resource);
4388 CTX_INIT(get_device_reset_status);
4389 CTX_INIT(set_device_reset_callback);
4390 CTX_INIT(dump_debug_state);
4391 CTX_INIT(set_log_context);
4392 CTX_INIT(emit_string_marker);
4393 CTX_INIT(set_debug_callback);
4394 CTX_INIT(create_fence_fd);
4395 CTX_INIT(fence_server_sync);
4396 CTX_INIT(fence_server_signal);
4397 CTX_INIT(get_timestamp);
4398 CTX_INIT(create_texture_handle);
4399 CTX_INIT(delete_texture_handle);
4400 CTX_INIT(make_texture_handle_resident);
4401 CTX_INIT(create_image_handle);
4402 CTX_INIT(delete_image_handle);
4403 CTX_INIT(make_image_handle_resident);
4404 CTX_INIT(set_frontend_noop);
4405 CTX_INIT(init_intel_perf_query_info);
4406 CTX_INIT(get_intel_perf_query_info);
4407 CTX_INIT(get_intel_perf_query_counter_info);
4408 CTX_INIT(new_intel_perf_query_obj);
4409 CTX_INIT(begin_intel_perf_query);
4410 CTX_INIT(end_intel_perf_query);
4411 CTX_INIT(delete_intel_perf_query);
4412 CTX_INIT(wait_intel_perf_query);
4413 CTX_INIT(is_intel_perf_query_ready);
4414 CTX_INIT(get_intel_perf_query_data);
4415 #undef CTX_INIT
4416
4417 if (out)
4418 *out = tc;
4419
4420 tc_begin_next_buffer_list(tc);
4421 return &tc->base;
4422
4423 fail:
4424 tc_destroy(&tc->base);
4425 return NULL;
4426 }
4427
4428 void
threaded_context_init_bytes_mapped_limit(struct threaded_context * tc,unsigned divisor)4429 threaded_context_init_bytes_mapped_limit(struct threaded_context *tc, unsigned divisor)
4430 {
4431 uint64_t total_ram;
4432 if (os_get_total_physical_memory(&total_ram)) {
4433 tc->bytes_mapped_limit = total_ram / divisor;
4434 if (sizeof(void*) == 4)
4435 tc->bytes_mapped_limit = MIN2(tc->bytes_mapped_limit, 512*1024*1024UL);
4436 }
4437 }
4438