1 /**************************************************************************
2 *
3 * Copyright 2017 Advanced Micro Devices, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * on the rights to use, copy, modify, merge, publish, distribute, sub
10 * license, and/or sell copies of the Software, and to permit persons to whom
11 * the Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
21 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
22 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
23 * USE OR OTHER DEALINGS IN THE SOFTWARE.
24 *
25 **************************************************************************/
26
27 #include "util/u_threaded_context.h"
28 #include "util/u_cpu_detect.h"
29 #include "util/format/u_format.h"
30 #include "util/u_inlines.h"
31 #include "util/u_memory.h"
32 #include "util/u_upload_mgr.h"
33 #include "driver_trace/tr_context.h"
34 #include "util/log.h"
35 #include "compiler/shader_info.h"
36
37 #if TC_DEBUG >= 1
38 #define tc_assert assert
39 #else
40 #define tc_assert(x)
41 #endif
42
43 #if TC_DEBUG >= 2
44 #define tc_printf mesa_logi
45 #define tc_asprintf asprintf
46 #define tc_strcmp strcmp
47 #else
48 #define tc_printf(...)
49 #define tc_asprintf(...) 0
50 #define tc_strcmp(...) 0
51 #endif
52
53 #define TC_SENTINEL 0x5ca1ab1e
54
55 enum tc_call_id {
56 #define CALL(name) TC_CALL_##name,
57 #include "u_threaded_context_calls.h"
58 #undef CALL
59 TC_NUM_CALLS,
60 };
61
62 #if TC_DEBUG >= 3
63 static const char *tc_call_names[] = {
64 #define CALL(name) #name,
65 #include "u_threaded_context_calls.h"
66 #undef CALL
67 };
68 #endif
69
70 typedef uint16_t (*tc_execute)(struct pipe_context *pipe, void *call, uint64_t *last);
71
72 static const tc_execute execute_func[TC_NUM_CALLS];
73
74 static void
75 tc_buffer_subdata(struct pipe_context *_pipe,
76 struct pipe_resource *resource,
77 unsigned usage, unsigned offset,
78 unsigned size, const void *data);
79
80 static void
tc_batch_check(UNUSED struct tc_batch * batch)81 tc_batch_check(UNUSED struct tc_batch *batch)
82 {
83 tc_assert(batch->sentinel == TC_SENTINEL);
84 tc_assert(batch->num_total_slots <= TC_SLOTS_PER_BATCH);
85 }
86
87 static void
tc_debug_check(struct threaded_context * tc)88 tc_debug_check(struct threaded_context *tc)
89 {
90 for (unsigned i = 0; i < TC_MAX_BATCHES; i++) {
91 tc_batch_check(&tc->batch_slots[i]);
92 tc_assert(tc->batch_slots[i].tc == tc);
93 }
94 }
95
96 static void
tc_set_driver_thread(struct threaded_context * tc)97 tc_set_driver_thread(struct threaded_context *tc)
98 {
99 #ifndef NDEBUG
100 tc->driver_thread = util_get_thread_id();
101 #endif
102 }
103
104 static void
tc_clear_driver_thread(struct threaded_context * tc)105 tc_clear_driver_thread(struct threaded_context *tc)
106 {
107 #ifndef NDEBUG
108 memset(&tc->driver_thread, 0, sizeof(tc->driver_thread));
109 #endif
110 }
111
112 static void *
to_call_check(void * ptr,unsigned num_slots)113 to_call_check(void *ptr, unsigned num_slots)
114 {
115 #if TC_DEBUG >= 1
116 struct tc_call_base *call = ptr;
117 tc_assert(call->num_slots == num_slots);
118 #endif
119 return ptr;
120 }
121 #define to_call(ptr, type) ((struct type *)to_call_check((void *)(ptr), call_size(type)))
122
123 #define size_to_slots(size) DIV_ROUND_UP(size, 8)
124 #define call_size(type) size_to_slots(sizeof(struct type))
125 #define call_size_with_slots(type, num_slots) size_to_slots( \
126 sizeof(struct type) + sizeof(((struct type*)NULL)->slot[0]) * (num_slots))
127 #define get_next_call(ptr, type) ((struct type*)((uint64_t*)ptr + call_size(type)))
128
129 /* Assign src to dst while dst is uninitialized. */
130 static inline void
tc_set_resource_reference(struct pipe_resource ** dst,struct pipe_resource * src)131 tc_set_resource_reference(struct pipe_resource **dst, struct pipe_resource *src)
132 {
133 *dst = src;
134 pipe_reference(NULL, &src->reference); /* only increment refcount */
135 }
136
137 /* Assign src to dst while dst is uninitialized. */
138 static inline void
tc_set_vertex_state_reference(struct pipe_vertex_state ** dst,struct pipe_vertex_state * src)139 tc_set_vertex_state_reference(struct pipe_vertex_state **dst,
140 struct pipe_vertex_state *src)
141 {
142 *dst = src;
143 pipe_reference(NULL, &src->reference); /* only increment refcount */
144 }
145
146 /* Unreference dst but don't touch the dst pointer. */
147 static inline void
tc_drop_resource_reference(struct pipe_resource * dst)148 tc_drop_resource_reference(struct pipe_resource *dst)
149 {
150 if (pipe_reference(&dst->reference, NULL)) /* only decrement refcount */
151 pipe_resource_destroy(dst);
152 }
153
154 /* Unreference dst but don't touch the dst pointer. */
155 static inline void
tc_drop_surface_reference(struct pipe_surface * dst)156 tc_drop_surface_reference(struct pipe_surface *dst)
157 {
158 if (pipe_reference(&dst->reference, NULL)) /* only decrement refcount */
159 dst->context->surface_destroy(dst->context, dst);
160 }
161
162 /* Unreference dst but don't touch the dst pointer. */
163 static inline void
tc_drop_so_target_reference(struct pipe_stream_output_target * dst)164 tc_drop_so_target_reference(struct pipe_stream_output_target *dst)
165 {
166 if (pipe_reference(&dst->reference, NULL)) /* only decrement refcount */
167 dst->context->stream_output_target_destroy(dst->context, dst);
168 }
169
170 /**
171 * Subtract the given number of references.
172 */
173 static inline void
tc_drop_vertex_state_references(struct pipe_vertex_state * dst,int num_refs)174 tc_drop_vertex_state_references(struct pipe_vertex_state *dst, int num_refs)
175 {
176 int count = p_atomic_add_return(&dst->reference.count, -num_refs);
177
178 assert(count >= 0);
179 /* Underflows shouldn't happen, but let's be safe. */
180 if (count <= 0)
181 dst->screen->vertex_state_destroy(dst->screen, dst);
182 }
183
184 /* We don't want to read or write min_index and max_index, because
185 * it shouldn't be needed by drivers at this point.
186 */
187 #define DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX \
188 offsetof(struct pipe_draw_info, min_index)
189
190 static void
tc_batch_execute(void * job,UNUSED void * gdata,int thread_index)191 tc_batch_execute(void *job, UNUSED void *gdata, int thread_index)
192 {
193 struct tc_batch *batch = job;
194 struct pipe_context *pipe = batch->tc->pipe;
195 uint64_t *last = &batch->slots[batch->num_total_slots];
196
197 tc_batch_check(batch);
198 tc_set_driver_thread(batch->tc);
199
200 assert(!batch->token);
201
202 for (uint64_t *iter = batch->slots; iter != last;) {
203 struct tc_call_base *call = (struct tc_call_base *)iter;
204
205 tc_assert(call->sentinel == TC_SENTINEL);
206
207 #if TC_DEBUG >= 3
208 tc_printf("CALL: %s", tc_call_names[call->call_id]);
209 #endif
210
211 iter += execute_func[call->call_id](pipe, call, last);
212 }
213
214 /* Add the fence to the list of fences for the driver to signal at the next
215 * flush, which we use for tracking which buffers are referenced by
216 * an unflushed command buffer.
217 */
218 struct threaded_context *tc = batch->tc;
219 struct util_queue_fence *fence =
220 &tc->buffer_lists[batch->buffer_list_index].driver_flushed_fence;
221
222 if (tc->options.driver_calls_flush_notify) {
223 tc->signal_fences_next_flush[tc->num_signal_fences_next_flush++] = fence;
224
225 /* Since our buffer lists are chained as a ring, we need to flush
226 * the context twice as we go around the ring to make the driver signal
227 * the buffer list fences, so that the producer thread can reuse the buffer
228 * list structures for the next batches without waiting.
229 */
230 unsigned half_ring = TC_MAX_BUFFER_LISTS / 2;
231 if (batch->buffer_list_index % half_ring == half_ring - 1)
232 pipe->flush(pipe, NULL, PIPE_FLUSH_ASYNC);
233 } else {
234 util_queue_fence_signal(fence);
235 }
236
237 tc_clear_driver_thread(batch->tc);
238 tc_batch_check(batch);
239 batch->num_total_slots = 0;
240 }
241
242 static void
tc_begin_next_buffer_list(struct threaded_context * tc)243 tc_begin_next_buffer_list(struct threaded_context *tc)
244 {
245 tc->next_buf_list = (tc->next_buf_list + 1) % TC_MAX_BUFFER_LISTS;
246
247 tc->batch_slots[tc->next].buffer_list_index = tc->next_buf_list;
248
249 /* Clear the buffer list in the new empty batch. */
250 struct tc_buffer_list *buf_list = &tc->buffer_lists[tc->next_buf_list];
251 assert(util_queue_fence_is_signalled(&buf_list->driver_flushed_fence));
252 util_queue_fence_reset(&buf_list->driver_flushed_fence); /* set to unsignalled */
253 BITSET_ZERO(buf_list->buffer_list);
254
255 tc->add_all_gfx_bindings_to_buffer_list = true;
256 tc->add_all_compute_bindings_to_buffer_list = true;
257 }
258
259 static void
tc_batch_flush(struct threaded_context * tc)260 tc_batch_flush(struct threaded_context *tc)
261 {
262 struct tc_batch *next = &tc->batch_slots[tc->next];
263
264 tc_assert(next->num_total_slots != 0);
265 tc_batch_check(next);
266 tc_debug_check(tc);
267 tc->bytes_mapped_estimate = 0;
268 p_atomic_add(&tc->num_offloaded_slots, next->num_total_slots);
269
270 if (next->token) {
271 next->token->tc = NULL;
272 tc_unflushed_batch_token_reference(&next->token, NULL);
273 }
274
275 util_queue_add_job(&tc->queue, next, &next->fence, tc_batch_execute,
276 NULL, 0);
277 tc->last = tc->next;
278 tc->next = (tc->next + 1) % TC_MAX_BATCHES;
279 tc_begin_next_buffer_list(tc);
280 }
281
282 /* This is the function that adds variable-sized calls into the current
283 * batch. It also flushes the batch if there is not enough space there.
284 * All other higher-level "add" functions use it.
285 */
286 static void *
tc_add_sized_call(struct threaded_context * tc,enum tc_call_id id,unsigned num_slots)287 tc_add_sized_call(struct threaded_context *tc, enum tc_call_id id,
288 unsigned num_slots)
289 {
290 struct tc_batch *next = &tc->batch_slots[tc->next];
291 assert(num_slots <= TC_SLOTS_PER_BATCH);
292 tc_debug_check(tc);
293
294 if (unlikely(next->num_total_slots + num_slots > TC_SLOTS_PER_BATCH)) {
295 tc_batch_flush(tc);
296 next = &tc->batch_slots[tc->next];
297 tc_assert(next->num_total_slots == 0);
298 }
299
300 tc_assert(util_queue_fence_is_signalled(&next->fence));
301
302 struct tc_call_base *call = (struct tc_call_base*)&next->slots[next->num_total_slots];
303 next->num_total_slots += num_slots;
304
305 #if !defined(NDEBUG) && TC_DEBUG >= 1
306 call->sentinel = TC_SENTINEL;
307 #endif
308 call->call_id = id;
309 call->num_slots = num_slots;
310
311 #if TC_DEBUG >= 3
312 tc_printf("ENQUEUE: %s", tc_call_names[id]);
313 #endif
314
315 tc_debug_check(tc);
316 return call;
317 }
318
319 #define tc_add_call(tc, execute, type) \
320 ((struct type*)tc_add_sized_call(tc, execute, call_size(type)))
321
322 #define tc_add_slot_based_call(tc, execute, type, num_slots) \
323 ((struct type*)tc_add_sized_call(tc, execute, \
324 call_size_with_slots(type, num_slots)))
325
326 static bool
tc_is_sync(struct threaded_context * tc)327 tc_is_sync(struct threaded_context *tc)
328 {
329 struct tc_batch *last = &tc->batch_slots[tc->last];
330 struct tc_batch *next = &tc->batch_slots[tc->next];
331
332 return util_queue_fence_is_signalled(&last->fence) &&
333 !next->num_total_slots;
334 }
335
336 static void
_tc_sync(struct threaded_context * tc,UNUSED const char * info,UNUSED const char * func)337 _tc_sync(struct threaded_context *tc, UNUSED const char *info, UNUSED const char *func)
338 {
339 struct tc_batch *last = &tc->batch_slots[tc->last];
340 struct tc_batch *next = &tc->batch_slots[tc->next];
341 bool synced = false;
342
343 tc_debug_check(tc);
344
345 /* Only wait for queued calls... */
346 if (!util_queue_fence_is_signalled(&last->fence)) {
347 util_queue_fence_wait(&last->fence);
348 synced = true;
349 }
350
351 tc_debug_check(tc);
352
353 if (next->token) {
354 next->token->tc = NULL;
355 tc_unflushed_batch_token_reference(&next->token, NULL);
356 }
357
358 /* .. and execute unflushed calls directly. */
359 if (next->num_total_slots) {
360 p_atomic_add(&tc->num_direct_slots, next->num_total_slots);
361 tc->bytes_mapped_estimate = 0;
362 tc_batch_execute(next, NULL, 0);
363 tc_begin_next_buffer_list(tc);
364 synced = true;
365 }
366
367 if (synced) {
368 p_atomic_inc(&tc->num_syncs);
369
370 if (tc_strcmp(func, "tc_destroy") != 0) {
371 tc_printf("sync %s %s", func, info);
372 }
373 }
374
375 tc_debug_check(tc);
376 }
377
378 #define tc_sync(tc) _tc_sync(tc, "", __func__)
379 #define tc_sync_msg(tc, info) _tc_sync(tc, info, __func__)
380
381 /**
382 * Call this from fence_finish for same-context fence waits of deferred fences
383 * that haven't been flushed yet.
384 *
385 * The passed pipe_context must be the one passed to pipe_screen::fence_finish,
386 * i.e., the wrapped one.
387 */
388 void
threaded_context_flush(struct pipe_context * _pipe,struct tc_unflushed_batch_token * token,bool prefer_async)389 threaded_context_flush(struct pipe_context *_pipe,
390 struct tc_unflushed_batch_token *token,
391 bool prefer_async)
392 {
393 struct threaded_context *tc = threaded_context(_pipe);
394
395 /* This is called from the gallium frontend / application thread. */
396 if (token->tc && token->tc == tc) {
397 struct tc_batch *last = &tc->batch_slots[tc->last];
398
399 /* Prefer to do the flush in the driver thread if it is already
400 * running. That should be better for cache locality.
401 */
402 if (prefer_async || !util_queue_fence_is_signalled(&last->fence))
403 tc_batch_flush(tc);
404 else
405 tc_sync(token->tc);
406 }
407 }
408
409 /* Must be called before TC binds, maps, invalidates, or adds a buffer to a buffer list. */
tc_touch_buffer(struct threaded_context * tc,struct threaded_resource * buf)410 static void tc_touch_buffer(struct threaded_context *tc, struct threaded_resource *buf)
411 {
412 const struct threaded_context *first_user = buf->first_user;
413
414 /* Fast path exit to avoid additional branches */
415 if (likely(first_user == tc))
416 return;
417
418 if (!first_user)
419 first_user = p_atomic_cmpxchg_ptr(&buf->first_user, NULL, tc);
420
421 /* The NULL check might seem unnecessary here but it's actually critical:
422 * p_atomic_cmpxchg will return NULL if it succeeds, meaning that NULL is
423 * equivalent to "we're the first user" here. (It's equally important not
424 * to ignore the result of the cmpxchg above, since it might fail.)
425 * Without the NULL check, we'd set the flag unconditionally, which is bad.
426 */
427 if (first_user && first_user != tc && !buf->used_by_multiple_contexts)
428 buf->used_by_multiple_contexts = true;
429 }
430
tc_is_buffer_shared(struct threaded_resource * buf)431 static bool tc_is_buffer_shared(struct threaded_resource *buf)
432 {
433 return buf->is_shared || buf->used_by_multiple_contexts;
434 }
435
436 static void
tc_add_to_buffer_list(struct threaded_context * tc,struct tc_buffer_list * next,struct pipe_resource * buf)437 tc_add_to_buffer_list(struct threaded_context *tc, struct tc_buffer_list *next, struct pipe_resource *buf)
438 {
439 struct threaded_resource *tbuf = threaded_resource(buf);
440 tc_touch_buffer(tc, tbuf);
441
442 uint32_t id = tbuf->buffer_id_unique;
443 BITSET_SET(next->buffer_list, id & TC_BUFFER_ID_MASK);
444 }
445
446 /* Set a buffer binding and add it to the buffer list. */
447 static void
tc_bind_buffer(struct threaded_context * tc,uint32_t * binding,struct tc_buffer_list * next,struct pipe_resource * buf)448 tc_bind_buffer(struct threaded_context *tc, uint32_t *binding, struct tc_buffer_list *next, struct pipe_resource *buf)
449 {
450 struct threaded_resource *tbuf = threaded_resource(buf);
451 tc_touch_buffer(tc, tbuf);
452
453 uint32_t id = tbuf->buffer_id_unique;
454 *binding = id;
455 BITSET_SET(next->buffer_list, id & TC_BUFFER_ID_MASK);
456 }
457
458 /* Reset a buffer binding. */
459 static void
tc_unbind_buffer(uint32_t * binding)460 tc_unbind_buffer(uint32_t *binding)
461 {
462 *binding = 0;
463 }
464
465 /* Reset a range of buffer binding slots. */
466 static void
tc_unbind_buffers(uint32_t * binding,unsigned count)467 tc_unbind_buffers(uint32_t *binding, unsigned count)
468 {
469 if (count)
470 memset(binding, 0, sizeof(*binding) * count);
471 }
472
473 static void
tc_add_bindings_to_buffer_list(BITSET_WORD * buffer_list,const uint32_t * bindings,unsigned count)474 tc_add_bindings_to_buffer_list(BITSET_WORD *buffer_list, const uint32_t *bindings,
475 unsigned count)
476 {
477 for (unsigned i = 0; i < count; i++) {
478 if (bindings[i])
479 BITSET_SET(buffer_list, bindings[i] & TC_BUFFER_ID_MASK);
480 }
481 }
482
483 static bool
tc_rebind_bindings(uint32_t old_id,uint32_t new_id,uint32_t * bindings,unsigned count)484 tc_rebind_bindings(uint32_t old_id, uint32_t new_id, uint32_t *bindings,
485 unsigned count)
486 {
487 unsigned rebind_count = 0;
488
489 for (unsigned i = 0; i < count; i++) {
490 if (bindings[i] == old_id) {
491 bindings[i] = new_id;
492 rebind_count++;
493 }
494 }
495 return rebind_count;
496 }
497
498 static void
tc_add_shader_bindings_to_buffer_list(struct threaded_context * tc,BITSET_WORD * buffer_list,enum pipe_shader_type shader)499 tc_add_shader_bindings_to_buffer_list(struct threaded_context *tc,
500 BITSET_WORD *buffer_list,
501 enum pipe_shader_type shader)
502 {
503 tc_add_bindings_to_buffer_list(buffer_list, tc->const_buffers[shader],
504 tc->max_const_buffers);
505 if (tc->seen_shader_buffers[shader]) {
506 tc_add_bindings_to_buffer_list(buffer_list, tc->shader_buffers[shader],
507 tc->max_shader_buffers);
508 }
509 if (tc->seen_image_buffers[shader]) {
510 tc_add_bindings_to_buffer_list(buffer_list, tc->image_buffers[shader],
511 tc->max_images);
512 }
513 if (tc->seen_sampler_buffers[shader]) {
514 tc_add_bindings_to_buffer_list(buffer_list, tc->sampler_buffers[shader],
515 tc->max_samplers);
516 }
517 }
518
519 static unsigned
tc_rebind_shader_bindings(struct threaded_context * tc,uint32_t old_id,uint32_t new_id,enum pipe_shader_type shader,uint32_t * rebind_mask)520 tc_rebind_shader_bindings(struct threaded_context *tc, uint32_t old_id,
521 uint32_t new_id, enum pipe_shader_type shader, uint32_t *rebind_mask)
522 {
523 unsigned ubo = 0, ssbo = 0, img = 0, sampler = 0;
524
525 ubo = tc_rebind_bindings(old_id, new_id, tc->const_buffers[shader],
526 tc->max_const_buffers);
527 if (ubo)
528 *rebind_mask |= BITFIELD_BIT(TC_BINDING_UBO_VS) << shader;
529 if (tc->seen_shader_buffers[shader]) {
530 ssbo = tc_rebind_bindings(old_id, new_id, tc->shader_buffers[shader],
531 tc->max_shader_buffers);
532 if (ssbo)
533 *rebind_mask |= BITFIELD_BIT(TC_BINDING_SSBO_VS) << shader;
534 }
535 if (tc->seen_image_buffers[shader]) {
536 img = tc_rebind_bindings(old_id, new_id, tc->image_buffers[shader],
537 tc->max_images);
538 if (img)
539 *rebind_mask |= BITFIELD_BIT(TC_BINDING_IMAGE_VS) << shader;
540 }
541 if (tc->seen_sampler_buffers[shader]) {
542 sampler = tc_rebind_bindings(old_id, new_id, tc->sampler_buffers[shader],
543 tc->max_samplers);
544 if (sampler)
545 *rebind_mask |= BITFIELD_BIT(TC_BINDING_SAMPLERVIEW_VS) << shader;
546 }
547 return ubo + ssbo + img + sampler;
548 }
549
550 /* Add all bound buffers used by VS/TCS/TES/GS/FS to the buffer list.
551 * This is called by the first draw call in a batch when we want to inherit
552 * all bindings set by the previous batch.
553 */
554 static void
tc_add_all_gfx_bindings_to_buffer_list(struct threaded_context * tc)555 tc_add_all_gfx_bindings_to_buffer_list(struct threaded_context *tc)
556 {
557 BITSET_WORD *buffer_list = tc->buffer_lists[tc->next_buf_list].buffer_list;
558
559 tc_add_bindings_to_buffer_list(buffer_list, tc->vertex_buffers, tc->max_vertex_buffers);
560 if (tc->seen_streamout_buffers)
561 tc_add_bindings_to_buffer_list(buffer_list, tc->streamout_buffers, PIPE_MAX_SO_BUFFERS);
562
563 tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_VERTEX);
564 tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_FRAGMENT);
565
566 if (tc->seen_tcs)
567 tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_TESS_CTRL);
568 if (tc->seen_tes)
569 tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_TESS_EVAL);
570 if (tc->seen_gs)
571 tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_GEOMETRY);
572
573 tc->add_all_gfx_bindings_to_buffer_list = false;
574 }
575
576 /* Add all bound buffers used by compute to the buffer list.
577 * This is called by the first compute call in a batch when we want to inherit
578 * all bindings set by the previous batch.
579 */
580 static void
tc_add_all_compute_bindings_to_buffer_list(struct threaded_context * tc)581 tc_add_all_compute_bindings_to_buffer_list(struct threaded_context *tc)
582 {
583 BITSET_WORD *buffer_list = tc->buffer_lists[tc->next_buf_list].buffer_list;
584
585 tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_COMPUTE);
586 tc->add_all_compute_bindings_to_buffer_list = false;
587 }
588
589 static unsigned
tc_rebind_buffer(struct threaded_context * tc,uint32_t old_id,uint32_t new_id,uint32_t * rebind_mask)590 tc_rebind_buffer(struct threaded_context *tc, uint32_t old_id, uint32_t new_id, uint32_t *rebind_mask)
591 {
592 unsigned vbo = 0, so = 0;
593
594 vbo = tc_rebind_bindings(old_id, new_id, tc->vertex_buffers,
595 tc->max_vertex_buffers);
596 if (vbo)
597 *rebind_mask |= BITFIELD_BIT(TC_BINDING_VERTEX_BUFFER);
598
599 if (tc->seen_streamout_buffers) {
600 so = tc_rebind_bindings(old_id, new_id, tc->streamout_buffers,
601 PIPE_MAX_SO_BUFFERS);
602 if (so)
603 *rebind_mask |= BITFIELD_BIT(TC_BINDING_STREAMOUT_BUFFER);
604 }
605 unsigned rebound = vbo + so;
606
607 rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_VERTEX, rebind_mask);
608 rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_FRAGMENT, rebind_mask);
609
610 if (tc->seen_tcs)
611 rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_TESS_CTRL, rebind_mask);
612 if (tc->seen_tes)
613 rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_TESS_EVAL, rebind_mask);
614 if (tc->seen_gs)
615 rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_GEOMETRY, rebind_mask);
616
617 rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_COMPUTE, rebind_mask);
618
619 if (rebound)
620 BITSET_SET(tc->buffer_lists[tc->next_buf_list].buffer_list, new_id & TC_BUFFER_ID_MASK);
621 return rebound;
622 }
623
624 static bool
tc_is_buffer_bound_with_mask(uint32_t id,uint32_t * bindings,unsigned binding_mask)625 tc_is_buffer_bound_with_mask(uint32_t id, uint32_t *bindings, unsigned binding_mask)
626 {
627 while (binding_mask) {
628 if (bindings[u_bit_scan(&binding_mask)] == id)
629 return true;
630 }
631 return false;
632 }
633
634 static bool
tc_is_buffer_shader_bound_for_write(struct threaded_context * tc,uint32_t id,enum pipe_shader_type shader)635 tc_is_buffer_shader_bound_for_write(struct threaded_context *tc, uint32_t id,
636 enum pipe_shader_type shader)
637 {
638 if (tc->seen_shader_buffers[shader] &&
639 tc_is_buffer_bound_with_mask(id, tc->shader_buffers[shader],
640 tc->shader_buffers_writeable_mask[shader]))
641 return true;
642
643 if (tc->seen_image_buffers[shader] &&
644 tc_is_buffer_bound_with_mask(id, tc->image_buffers[shader],
645 tc->image_buffers_writeable_mask[shader]))
646 return true;
647
648 return false;
649 }
650
651 static bool
tc_is_buffer_bound_for_write(struct threaded_context * tc,uint32_t id)652 tc_is_buffer_bound_for_write(struct threaded_context *tc, uint32_t id)
653 {
654 if (tc->seen_streamout_buffers &&
655 tc_is_buffer_bound_with_mask(id, tc->streamout_buffers,
656 BITFIELD_MASK(PIPE_MAX_SO_BUFFERS)))
657 return true;
658
659 if (tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_VERTEX) ||
660 tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_FRAGMENT) ||
661 tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_COMPUTE))
662 return true;
663
664 if (tc->seen_tcs &&
665 tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_TESS_CTRL))
666 return true;
667
668 if (tc->seen_tes &&
669 tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_TESS_EVAL))
670 return true;
671
672 if (tc->seen_gs &&
673 tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_GEOMETRY))
674 return true;
675
676 return false;
677 }
678
679 static bool
tc_is_buffer_busy(struct threaded_context * tc,struct threaded_resource * tbuf,unsigned map_usage)680 tc_is_buffer_busy(struct threaded_context *tc, struct threaded_resource *tbuf,
681 unsigned map_usage)
682 {
683 if (!tc->options.is_resource_busy)
684 return true;
685
686 uint32_t id_hash = tbuf->buffer_id_unique & TC_BUFFER_ID_MASK;
687
688 for (unsigned i = 0; i < TC_MAX_BUFFER_LISTS; i++) {
689 struct tc_buffer_list *buf_list = &tc->buffer_lists[i];
690
691 /* If the buffer is referenced by a batch that hasn't been flushed (by tc or the driver),
692 * then the buffer is considered busy. */
693 if (!util_queue_fence_is_signalled(&buf_list->driver_flushed_fence) &&
694 BITSET_TEST(buf_list->buffer_list, id_hash))
695 return true;
696 }
697
698 /* The buffer isn't referenced by any unflushed batch: we can safely ask to the driver whether
699 * this buffer is busy or not. */
700 return tc->options.is_resource_busy(tc->pipe->screen, tbuf->latest, map_usage);
701 }
702
703 /**
704 * allow_cpu_storage should be false for user memory and imported buffers.
705 */
706 void
threaded_resource_init(struct pipe_resource * res,bool allow_cpu_storage)707 threaded_resource_init(struct pipe_resource *res, bool allow_cpu_storage)
708 {
709 struct threaded_resource *tres = threaded_resource(res);
710
711 tres->first_user = NULL;
712 tres->used_by_multiple_contexts = false;
713 tres->latest = &tres->b;
714 tres->cpu_storage = NULL;
715 util_range_init(&tres->valid_buffer_range);
716 tres->is_shared = false;
717 tres->is_user_ptr = false;
718 tres->buffer_id_unique = 0;
719 tres->pending_staging_uploads = 0;
720 util_range_init(&tres->pending_staging_uploads_range);
721
722 if (allow_cpu_storage &&
723 !(res->flags & (PIPE_RESOURCE_FLAG_MAP_PERSISTENT |
724 PIPE_RESOURCE_FLAG_SPARSE |
725 PIPE_RESOURCE_FLAG_ENCRYPTED)) &&
726 /* We need buffer invalidation and buffer busyness tracking for the CPU
727 * storage, which aren't supported with pipe_vertex_state. */
728 !(res->bind & PIPE_BIND_VERTEX_STATE))
729 tres->allow_cpu_storage = true;
730 else
731 tres->allow_cpu_storage = false;
732 }
733
734 void
threaded_resource_deinit(struct pipe_resource * res)735 threaded_resource_deinit(struct pipe_resource *res)
736 {
737 struct threaded_resource *tres = threaded_resource(res);
738
739 if (tres->latest != &tres->b)
740 pipe_resource_reference(&tres->latest, NULL);
741 util_range_destroy(&tres->valid_buffer_range);
742 util_range_destroy(&tres->pending_staging_uploads_range);
743 align_free(tres->cpu_storage);
744 }
745
746 struct pipe_context *
threaded_context_unwrap_sync(struct pipe_context * pipe)747 threaded_context_unwrap_sync(struct pipe_context *pipe)
748 {
749 if (!pipe || !pipe->priv)
750 return pipe;
751
752 tc_sync(threaded_context(pipe));
753 return (struct pipe_context*)pipe->priv;
754 }
755
756
757 /********************************************************************
758 * simple functions
759 */
760
761 #define TC_FUNC1(func, qualifier, type, deref, addr, ...) \
762 struct tc_call_##func { \
763 struct tc_call_base base; \
764 type state; \
765 }; \
766 \
767 static uint16_t \
768 tc_call_##func(struct pipe_context *pipe, void *call, uint64_t *last) \
769 { \
770 pipe->func(pipe, addr(to_call(call, tc_call_##func)->state)); \
771 return call_size(tc_call_##func); \
772 } \
773 \
774 static void \
775 tc_##func(struct pipe_context *_pipe, qualifier type deref param) \
776 { \
777 struct threaded_context *tc = threaded_context(_pipe); \
778 struct tc_call_##func *p = (struct tc_call_##func*) \
779 tc_add_call(tc, TC_CALL_##func, tc_call_##func); \
780 p->state = deref(param); \
781 __VA_ARGS__; \
782 }
783
784 TC_FUNC1(set_active_query_state, , bool, , )
785
786 TC_FUNC1(set_blend_color, const, struct pipe_blend_color, *, &)
787 TC_FUNC1(set_stencil_ref, const, struct pipe_stencil_ref, , )
788 TC_FUNC1(set_clip_state, const, struct pipe_clip_state, *, &)
789 TC_FUNC1(set_sample_mask, , unsigned, , )
790 TC_FUNC1(set_min_samples, , unsigned, , )
791 TC_FUNC1(set_polygon_stipple, const, struct pipe_poly_stipple, *, &)
792
793 TC_FUNC1(texture_barrier, , unsigned, , )
794 TC_FUNC1(memory_barrier, , unsigned, , )
795 TC_FUNC1(delete_texture_handle, , uint64_t, , )
796 TC_FUNC1(delete_image_handle, , uint64_t, , )
797 TC_FUNC1(set_frontend_noop, , bool, , )
798
799
800 /********************************************************************
801 * queries
802 */
803
804 static struct pipe_query *
tc_create_query(struct pipe_context * _pipe,unsigned query_type,unsigned index)805 tc_create_query(struct pipe_context *_pipe, unsigned query_type,
806 unsigned index)
807 {
808 struct threaded_context *tc = threaded_context(_pipe);
809 struct pipe_context *pipe = tc->pipe;
810
811 return pipe->create_query(pipe, query_type, index);
812 }
813
814 static struct pipe_query *
tc_create_batch_query(struct pipe_context * _pipe,unsigned num_queries,unsigned * query_types)815 tc_create_batch_query(struct pipe_context *_pipe, unsigned num_queries,
816 unsigned *query_types)
817 {
818 struct threaded_context *tc = threaded_context(_pipe);
819 struct pipe_context *pipe = tc->pipe;
820
821 return pipe->create_batch_query(pipe, num_queries, query_types);
822 }
823
824 struct tc_query_call {
825 struct tc_call_base base;
826 struct pipe_query *query;
827 };
828
829 static uint16_t
tc_call_destroy_query(struct pipe_context * pipe,void * call,uint64_t * last)830 tc_call_destroy_query(struct pipe_context *pipe, void *call, uint64_t *last)
831 {
832 struct pipe_query *query = to_call(call, tc_query_call)->query;
833 struct threaded_query *tq = threaded_query(query);
834
835 if (list_is_linked(&tq->head_unflushed))
836 list_del(&tq->head_unflushed);
837
838 pipe->destroy_query(pipe, query);
839 return call_size(tc_query_call);
840 }
841
842 static void
tc_destroy_query(struct pipe_context * _pipe,struct pipe_query * query)843 tc_destroy_query(struct pipe_context *_pipe, struct pipe_query *query)
844 {
845 struct threaded_context *tc = threaded_context(_pipe);
846
847 tc_add_call(tc, TC_CALL_destroy_query, tc_query_call)->query = query;
848 }
849
850 static uint16_t
tc_call_begin_query(struct pipe_context * pipe,void * call,uint64_t * last)851 tc_call_begin_query(struct pipe_context *pipe, void *call, uint64_t *last)
852 {
853 pipe->begin_query(pipe, to_call(call, tc_query_call)->query);
854 return call_size(tc_query_call);
855 }
856
857 static bool
tc_begin_query(struct pipe_context * _pipe,struct pipe_query * query)858 tc_begin_query(struct pipe_context *_pipe, struct pipe_query *query)
859 {
860 struct threaded_context *tc = threaded_context(_pipe);
861
862 tc_add_call(tc, TC_CALL_begin_query, tc_query_call)->query = query;
863 return true; /* we don't care about the return value for this call */
864 }
865
866 struct tc_end_query_call {
867 struct tc_call_base base;
868 struct threaded_context *tc;
869 struct pipe_query *query;
870 };
871
872 static uint16_t
tc_call_end_query(struct pipe_context * pipe,void * call,uint64_t * last)873 tc_call_end_query(struct pipe_context *pipe, void *call, uint64_t *last)
874 {
875 struct tc_end_query_call *p = to_call(call, tc_end_query_call);
876 struct threaded_query *tq = threaded_query(p->query);
877
878 if (!list_is_linked(&tq->head_unflushed))
879 list_add(&tq->head_unflushed, &p->tc->unflushed_queries);
880
881 pipe->end_query(pipe, p->query);
882 return call_size(tc_end_query_call);
883 }
884
885 static bool
tc_end_query(struct pipe_context * _pipe,struct pipe_query * query)886 tc_end_query(struct pipe_context *_pipe, struct pipe_query *query)
887 {
888 struct threaded_context *tc = threaded_context(_pipe);
889 struct threaded_query *tq = threaded_query(query);
890 struct tc_end_query_call *call =
891 tc_add_call(tc, TC_CALL_end_query, tc_end_query_call);
892
893 call->tc = tc;
894 call->query = query;
895
896 tq->flushed = false;
897
898 return true; /* we don't care about the return value for this call */
899 }
900
901 static bool
tc_get_query_result(struct pipe_context * _pipe,struct pipe_query * query,bool wait,union pipe_query_result * result)902 tc_get_query_result(struct pipe_context *_pipe,
903 struct pipe_query *query, bool wait,
904 union pipe_query_result *result)
905 {
906 struct threaded_context *tc = threaded_context(_pipe);
907 struct threaded_query *tq = threaded_query(query);
908 struct pipe_context *pipe = tc->pipe;
909 bool flushed = tq->flushed;
910
911 if (!flushed) {
912 tc_sync_msg(tc, wait ? "wait" : "nowait");
913 tc_set_driver_thread(tc);
914 }
915
916 bool success = pipe->get_query_result(pipe, query, wait, result);
917
918 if (!flushed)
919 tc_clear_driver_thread(tc);
920
921 if (success) {
922 tq->flushed = true;
923 if (list_is_linked(&tq->head_unflushed)) {
924 /* This is safe because it can only happen after we sync'd. */
925 list_del(&tq->head_unflushed);
926 }
927 }
928 return success;
929 }
930
931 struct tc_query_result_resource {
932 struct tc_call_base base;
933 enum pipe_query_flags flags:8;
934 enum pipe_query_value_type result_type:8;
935 int8_t index; /* it can be -1 */
936 unsigned offset;
937 struct pipe_query *query;
938 struct pipe_resource *resource;
939 };
940
941 static uint16_t
tc_call_get_query_result_resource(struct pipe_context * pipe,void * call,uint64_t * last)942 tc_call_get_query_result_resource(struct pipe_context *pipe, void *call, uint64_t *last)
943 {
944 struct tc_query_result_resource *p = to_call(call, tc_query_result_resource);
945
946 pipe->get_query_result_resource(pipe, p->query, p->flags, p->result_type,
947 p->index, p->resource, p->offset);
948 tc_drop_resource_reference(p->resource);
949 return call_size(tc_query_result_resource);
950 }
951
952 static void
tc_get_query_result_resource(struct pipe_context * _pipe,struct pipe_query * query,enum pipe_query_flags flags,enum pipe_query_value_type result_type,int index,struct pipe_resource * resource,unsigned offset)953 tc_get_query_result_resource(struct pipe_context *_pipe,
954 struct pipe_query *query,
955 enum pipe_query_flags flags,
956 enum pipe_query_value_type result_type, int index,
957 struct pipe_resource *resource, unsigned offset)
958 {
959 struct threaded_context *tc = threaded_context(_pipe);
960
961 tc_buffer_disable_cpu_storage(resource);
962
963 struct tc_query_result_resource *p =
964 tc_add_call(tc, TC_CALL_get_query_result_resource,
965 tc_query_result_resource);
966 p->query = query;
967 p->flags = flags;
968 p->result_type = result_type;
969 p->index = index;
970 tc_set_resource_reference(&p->resource, resource);
971 tc_add_to_buffer_list(tc, &tc->buffer_lists[tc->next_buf_list], resource);
972 p->offset = offset;
973 }
974
975 struct tc_render_condition {
976 struct tc_call_base base;
977 bool condition;
978 unsigned mode;
979 struct pipe_query *query;
980 };
981
982 static uint16_t
tc_call_render_condition(struct pipe_context * pipe,void * call,uint64_t * last)983 tc_call_render_condition(struct pipe_context *pipe, void *call, uint64_t *last)
984 {
985 struct tc_render_condition *p = to_call(call, tc_render_condition);
986 pipe->render_condition(pipe, p->query, p->condition, p->mode);
987 return call_size(tc_render_condition);
988 }
989
990 static void
tc_render_condition(struct pipe_context * _pipe,struct pipe_query * query,bool condition,enum pipe_render_cond_flag mode)991 tc_render_condition(struct pipe_context *_pipe,
992 struct pipe_query *query, bool condition,
993 enum pipe_render_cond_flag mode)
994 {
995 struct threaded_context *tc = threaded_context(_pipe);
996 struct tc_render_condition *p =
997 tc_add_call(tc, TC_CALL_render_condition, tc_render_condition);
998
999 p->query = query;
1000 p->condition = condition;
1001 p->mode = mode;
1002 }
1003
1004
1005 /********************************************************************
1006 * constant (immutable) states
1007 */
1008
1009 #define TC_CSO_CREATE(name, sname) \
1010 static void * \
1011 tc_create_##name##_state(struct pipe_context *_pipe, \
1012 const struct pipe_##sname##_state *state) \
1013 { \
1014 struct pipe_context *pipe = threaded_context(_pipe)->pipe; \
1015 return pipe->create_##name##_state(pipe, state); \
1016 }
1017
1018 #define TC_CSO_BIND(name, ...) TC_FUNC1(bind_##name##_state, , void *, , , ##__VA_ARGS__)
1019 #define TC_CSO_DELETE(name) TC_FUNC1(delete_##name##_state, , void *, , )
1020
1021 #define TC_CSO(name, sname, ...) \
1022 TC_CSO_CREATE(name, sname) \
1023 TC_CSO_BIND(name, ##__VA_ARGS__) \
1024 TC_CSO_DELETE(name)
1025
1026 #define TC_CSO_WHOLE(name) TC_CSO(name, name)
1027 #define TC_CSO_SHADER(name) TC_CSO(name, shader)
1028 #define TC_CSO_SHADER_TRACK(name) TC_CSO(name, shader, tc->seen_##name = true;)
1029
1030 TC_CSO_WHOLE(blend)
TC_CSO_WHOLE(rasterizer)1031 TC_CSO_WHOLE(rasterizer)
1032 TC_CSO_WHOLE(depth_stencil_alpha)
1033 TC_CSO_WHOLE(compute)
1034 TC_CSO_SHADER(fs)
1035 TC_CSO_SHADER(vs)
1036 TC_CSO_SHADER_TRACK(gs)
1037 TC_CSO_SHADER_TRACK(tcs)
1038 TC_CSO_SHADER_TRACK(tes)
1039 TC_CSO_CREATE(sampler, sampler)
1040 TC_CSO_DELETE(sampler)
1041 TC_CSO_BIND(vertex_elements)
1042 TC_CSO_DELETE(vertex_elements)
1043
1044 static void *
1045 tc_create_vertex_elements_state(struct pipe_context *_pipe, unsigned count,
1046 const struct pipe_vertex_element *elems)
1047 {
1048 struct pipe_context *pipe = threaded_context(_pipe)->pipe;
1049
1050 return pipe->create_vertex_elements_state(pipe, count, elems);
1051 }
1052
1053 struct tc_sampler_states {
1054 struct tc_call_base base;
1055 ubyte shader, start, count;
1056 void *slot[0]; /* more will be allocated if needed */
1057 };
1058
1059 static uint16_t
tc_call_bind_sampler_states(struct pipe_context * pipe,void * call,uint64_t * last)1060 tc_call_bind_sampler_states(struct pipe_context *pipe, void *call, uint64_t *last)
1061 {
1062 struct tc_sampler_states *p = (struct tc_sampler_states *)call;
1063
1064 pipe->bind_sampler_states(pipe, p->shader, p->start, p->count, p->slot);
1065 return p->base.num_slots;
1066 }
1067
1068 static void
tc_bind_sampler_states(struct pipe_context * _pipe,enum pipe_shader_type shader,unsigned start,unsigned count,void ** states)1069 tc_bind_sampler_states(struct pipe_context *_pipe,
1070 enum pipe_shader_type shader,
1071 unsigned start, unsigned count, void **states)
1072 {
1073 if (!count)
1074 return;
1075
1076 struct threaded_context *tc = threaded_context(_pipe);
1077 struct tc_sampler_states *p =
1078 tc_add_slot_based_call(tc, TC_CALL_bind_sampler_states, tc_sampler_states, count);
1079
1080 p->shader = shader;
1081 p->start = start;
1082 p->count = count;
1083 memcpy(p->slot, states, count * sizeof(states[0]));
1084 }
1085
1086 static void
tc_link_shader(struct pipe_context * _pipe,void ** shaders)1087 tc_link_shader(struct pipe_context *_pipe, void **shaders)
1088 {
1089 struct threaded_context *tc = threaded_context(_pipe);
1090 tc->pipe->link_shader(tc->pipe, shaders);
1091 }
1092 /********************************************************************
1093 * immediate states
1094 */
1095
1096 struct tc_framebuffer {
1097 struct tc_call_base base;
1098 struct pipe_framebuffer_state state;
1099 };
1100
1101 static uint16_t
tc_call_set_framebuffer_state(struct pipe_context * pipe,void * call,uint64_t * last)1102 tc_call_set_framebuffer_state(struct pipe_context *pipe, void *call, uint64_t *last)
1103 {
1104 struct pipe_framebuffer_state *p = &to_call(call, tc_framebuffer)->state;
1105
1106 pipe->set_framebuffer_state(pipe, p);
1107
1108 unsigned nr_cbufs = p->nr_cbufs;
1109 for (unsigned i = 0; i < nr_cbufs; i++)
1110 tc_drop_surface_reference(p->cbufs[i]);
1111 tc_drop_surface_reference(p->zsbuf);
1112 return call_size(tc_framebuffer);
1113 }
1114
1115 static void
tc_set_framebuffer_state(struct pipe_context * _pipe,const struct pipe_framebuffer_state * fb)1116 tc_set_framebuffer_state(struct pipe_context *_pipe,
1117 const struct pipe_framebuffer_state *fb)
1118 {
1119 struct threaded_context *tc = threaded_context(_pipe);
1120 struct tc_framebuffer *p =
1121 tc_add_call(tc, TC_CALL_set_framebuffer_state, tc_framebuffer);
1122 unsigned nr_cbufs = fb->nr_cbufs;
1123
1124 p->state.width = fb->width;
1125 p->state.height = fb->height;
1126 p->state.samples = fb->samples;
1127 p->state.layers = fb->layers;
1128 p->state.nr_cbufs = nr_cbufs;
1129
1130 for (unsigned i = 0; i < nr_cbufs; i++) {
1131 p->state.cbufs[i] = NULL;
1132 pipe_surface_reference(&p->state.cbufs[i], fb->cbufs[i]);
1133 }
1134 p->state.zsbuf = NULL;
1135 pipe_surface_reference(&p->state.zsbuf, fb->zsbuf);
1136 }
1137
1138 struct tc_tess_state {
1139 struct tc_call_base base;
1140 float state[6];
1141 };
1142
1143 static uint16_t
tc_call_set_tess_state(struct pipe_context * pipe,void * call,uint64_t * last)1144 tc_call_set_tess_state(struct pipe_context *pipe, void *call, uint64_t *last)
1145 {
1146 float *p = to_call(call, tc_tess_state)->state;
1147
1148 pipe->set_tess_state(pipe, p, p + 4);
1149 return call_size(tc_tess_state);
1150 }
1151
1152 static void
tc_set_tess_state(struct pipe_context * _pipe,const float default_outer_level[4],const float default_inner_level[2])1153 tc_set_tess_state(struct pipe_context *_pipe,
1154 const float default_outer_level[4],
1155 const float default_inner_level[2])
1156 {
1157 struct threaded_context *tc = threaded_context(_pipe);
1158 float *p = tc_add_call(tc, TC_CALL_set_tess_state, tc_tess_state)->state;
1159
1160 memcpy(p, default_outer_level, 4 * sizeof(float));
1161 memcpy(p + 4, default_inner_level, 2 * sizeof(float));
1162 }
1163
1164 struct tc_patch_vertices {
1165 struct tc_call_base base;
1166 ubyte patch_vertices;
1167 };
1168
1169 static uint16_t
tc_call_set_patch_vertices(struct pipe_context * pipe,void * call,uint64_t * last)1170 tc_call_set_patch_vertices(struct pipe_context *pipe, void *call, uint64_t *last)
1171 {
1172 uint8_t patch_vertices = to_call(call, tc_patch_vertices)->patch_vertices;
1173
1174 pipe->set_patch_vertices(pipe, patch_vertices);
1175 return call_size(tc_patch_vertices);
1176 }
1177
1178 static void
tc_set_patch_vertices(struct pipe_context * _pipe,uint8_t patch_vertices)1179 tc_set_patch_vertices(struct pipe_context *_pipe, uint8_t patch_vertices)
1180 {
1181 struct threaded_context *tc = threaded_context(_pipe);
1182
1183 tc_add_call(tc, TC_CALL_set_patch_vertices,
1184 tc_patch_vertices)->patch_vertices = patch_vertices;
1185 }
1186
1187 struct tc_constant_buffer_base {
1188 struct tc_call_base base;
1189 ubyte shader, index;
1190 bool is_null;
1191 };
1192
1193 struct tc_constant_buffer {
1194 struct tc_constant_buffer_base base;
1195 struct pipe_constant_buffer cb;
1196 };
1197
1198 static uint16_t
tc_call_set_constant_buffer(struct pipe_context * pipe,void * call,uint64_t * last)1199 tc_call_set_constant_buffer(struct pipe_context *pipe, void *call, uint64_t *last)
1200 {
1201 struct tc_constant_buffer *p = (struct tc_constant_buffer *)call;
1202
1203 if (unlikely(p->base.is_null)) {
1204 pipe->set_constant_buffer(pipe, p->base.shader, p->base.index, false, NULL);
1205 return call_size(tc_constant_buffer_base);
1206 }
1207
1208 pipe->set_constant_buffer(pipe, p->base.shader, p->base.index, true, &p->cb);
1209 return call_size(tc_constant_buffer);
1210 }
1211
1212 static void
tc_set_constant_buffer(struct pipe_context * _pipe,enum pipe_shader_type shader,uint index,bool take_ownership,const struct pipe_constant_buffer * cb)1213 tc_set_constant_buffer(struct pipe_context *_pipe,
1214 enum pipe_shader_type shader, uint index,
1215 bool take_ownership,
1216 const struct pipe_constant_buffer *cb)
1217 {
1218 struct threaded_context *tc = threaded_context(_pipe);
1219
1220 if (unlikely(!cb || (!cb->buffer && !cb->user_buffer))) {
1221 struct tc_constant_buffer_base *p =
1222 tc_add_call(tc, TC_CALL_set_constant_buffer, tc_constant_buffer_base);
1223 p->shader = shader;
1224 p->index = index;
1225 p->is_null = true;
1226 tc_unbind_buffer(&tc->const_buffers[shader][index]);
1227 return;
1228 }
1229
1230 struct pipe_resource *buffer;
1231 unsigned offset;
1232
1233 if (cb->user_buffer) {
1234 /* This must be done before adding set_constant_buffer, because it could
1235 * generate e.g. transfer_unmap and flush partially-uninitialized
1236 * set_constant_buffer to the driver if it was done afterwards.
1237 */
1238 buffer = NULL;
1239 u_upload_data(tc->base.const_uploader, 0, cb->buffer_size,
1240 tc->ubo_alignment, cb->user_buffer, &offset, &buffer);
1241 u_upload_unmap(tc->base.const_uploader);
1242 take_ownership = true;
1243 } else {
1244 buffer = cb->buffer;
1245 offset = cb->buffer_offset;
1246 }
1247
1248 struct tc_constant_buffer *p =
1249 tc_add_call(tc, TC_CALL_set_constant_buffer, tc_constant_buffer);
1250 p->base.shader = shader;
1251 p->base.index = index;
1252 p->base.is_null = false;
1253 p->cb.user_buffer = NULL;
1254 p->cb.buffer_offset = offset;
1255 p->cb.buffer_size = cb->buffer_size;
1256
1257 if (take_ownership)
1258 p->cb.buffer = buffer;
1259 else
1260 tc_set_resource_reference(&p->cb.buffer, buffer);
1261
1262 if (buffer) {
1263 tc_bind_buffer(tc, &tc->const_buffers[shader][index],
1264 &tc->buffer_lists[tc->next_buf_list], buffer);
1265 } else {
1266 tc_unbind_buffer(&tc->const_buffers[shader][index]);
1267 }
1268 }
1269
1270 struct tc_inlinable_constants {
1271 struct tc_call_base base;
1272 ubyte shader;
1273 ubyte num_values;
1274 uint32_t values[MAX_INLINABLE_UNIFORMS];
1275 };
1276
1277 static uint16_t
tc_call_set_inlinable_constants(struct pipe_context * pipe,void * call,uint64_t * last)1278 tc_call_set_inlinable_constants(struct pipe_context *pipe, void *call, uint64_t *last)
1279 {
1280 struct tc_inlinable_constants *p = to_call(call, tc_inlinable_constants);
1281
1282 pipe->set_inlinable_constants(pipe, p->shader, p->num_values, p->values);
1283 return call_size(tc_inlinable_constants);
1284 }
1285
1286 static void
tc_set_inlinable_constants(struct pipe_context * _pipe,enum pipe_shader_type shader,uint num_values,uint32_t * values)1287 tc_set_inlinable_constants(struct pipe_context *_pipe,
1288 enum pipe_shader_type shader,
1289 uint num_values, uint32_t *values)
1290 {
1291 struct threaded_context *tc = threaded_context(_pipe);
1292 struct tc_inlinable_constants *p =
1293 tc_add_call(tc, TC_CALL_set_inlinable_constants, tc_inlinable_constants);
1294 p->shader = shader;
1295 p->num_values = num_values;
1296 memcpy(p->values, values, num_values * 4);
1297 }
1298
1299 struct tc_sample_locations {
1300 struct tc_call_base base;
1301 uint16_t size;
1302 uint8_t slot[0];
1303 };
1304
1305
1306 static uint16_t
tc_call_set_sample_locations(struct pipe_context * pipe,void * call,uint64_t * last)1307 tc_call_set_sample_locations(struct pipe_context *pipe, void *call, uint64_t *last)
1308 {
1309 struct tc_sample_locations *p = (struct tc_sample_locations *)call;
1310
1311 pipe->set_sample_locations(pipe, p->size, p->slot);
1312 return p->base.num_slots;
1313 }
1314
1315 static void
tc_set_sample_locations(struct pipe_context * _pipe,size_t size,const uint8_t * locations)1316 tc_set_sample_locations(struct pipe_context *_pipe, size_t size, const uint8_t *locations)
1317 {
1318 struct threaded_context *tc = threaded_context(_pipe);
1319 struct tc_sample_locations *p =
1320 tc_add_slot_based_call(tc, TC_CALL_set_sample_locations,
1321 tc_sample_locations, size);
1322
1323 p->size = size;
1324 memcpy(p->slot, locations, size);
1325 }
1326
1327 struct tc_scissors {
1328 struct tc_call_base base;
1329 ubyte start, count;
1330 struct pipe_scissor_state slot[0]; /* more will be allocated if needed */
1331 };
1332
1333 static uint16_t
tc_call_set_scissor_states(struct pipe_context * pipe,void * call,uint64_t * last)1334 tc_call_set_scissor_states(struct pipe_context *pipe, void *call, uint64_t *last)
1335 {
1336 struct tc_scissors *p = (struct tc_scissors *)call;
1337
1338 pipe->set_scissor_states(pipe, p->start, p->count, p->slot);
1339 return p->base.num_slots;
1340 }
1341
1342 static void
tc_set_scissor_states(struct pipe_context * _pipe,unsigned start,unsigned count,const struct pipe_scissor_state * states)1343 tc_set_scissor_states(struct pipe_context *_pipe,
1344 unsigned start, unsigned count,
1345 const struct pipe_scissor_state *states)
1346 {
1347 struct threaded_context *tc = threaded_context(_pipe);
1348 struct tc_scissors *p =
1349 tc_add_slot_based_call(tc, TC_CALL_set_scissor_states, tc_scissors, count);
1350
1351 p->start = start;
1352 p->count = count;
1353 memcpy(&p->slot, states, count * sizeof(states[0]));
1354 }
1355
1356 struct tc_viewports {
1357 struct tc_call_base base;
1358 ubyte start, count;
1359 struct pipe_viewport_state slot[0]; /* more will be allocated if needed */
1360 };
1361
1362 static uint16_t
tc_call_set_viewport_states(struct pipe_context * pipe,void * call,uint64_t * last)1363 tc_call_set_viewport_states(struct pipe_context *pipe, void *call, uint64_t *last)
1364 {
1365 struct tc_viewports *p = (struct tc_viewports *)call;
1366
1367 pipe->set_viewport_states(pipe, p->start, p->count, p->slot);
1368 return p->base.num_slots;
1369 }
1370
1371 static void
tc_set_viewport_states(struct pipe_context * _pipe,unsigned start,unsigned count,const struct pipe_viewport_state * states)1372 tc_set_viewport_states(struct pipe_context *_pipe,
1373 unsigned start, unsigned count,
1374 const struct pipe_viewport_state *states)
1375 {
1376 if (!count)
1377 return;
1378
1379 struct threaded_context *tc = threaded_context(_pipe);
1380 struct tc_viewports *p =
1381 tc_add_slot_based_call(tc, TC_CALL_set_viewport_states, tc_viewports, count);
1382
1383 p->start = start;
1384 p->count = count;
1385 memcpy(&p->slot, states, count * sizeof(states[0]));
1386 }
1387
1388 struct tc_window_rects {
1389 struct tc_call_base base;
1390 bool include;
1391 ubyte count;
1392 struct pipe_scissor_state slot[0]; /* more will be allocated if needed */
1393 };
1394
1395 static uint16_t
tc_call_set_window_rectangles(struct pipe_context * pipe,void * call,uint64_t * last)1396 tc_call_set_window_rectangles(struct pipe_context *pipe, void *call, uint64_t *last)
1397 {
1398 struct tc_window_rects *p = (struct tc_window_rects *)call;
1399
1400 pipe->set_window_rectangles(pipe, p->include, p->count, p->slot);
1401 return p->base.num_slots;
1402 }
1403
1404 static void
tc_set_window_rectangles(struct pipe_context * _pipe,bool include,unsigned count,const struct pipe_scissor_state * rects)1405 tc_set_window_rectangles(struct pipe_context *_pipe, bool include,
1406 unsigned count,
1407 const struct pipe_scissor_state *rects)
1408 {
1409 struct threaded_context *tc = threaded_context(_pipe);
1410 struct tc_window_rects *p =
1411 tc_add_slot_based_call(tc, TC_CALL_set_window_rectangles, tc_window_rects, count);
1412
1413 p->include = include;
1414 p->count = count;
1415 memcpy(p->slot, rects, count * sizeof(rects[0]));
1416 }
1417
1418 struct tc_sampler_views {
1419 struct tc_call_base base;
1420 ubyte shader, start, count, unbind_num_trailing_slots;
1421 struct pipe_sampler_view *slot[0]; /* more will be allocated if needed */
1422 };
1423
1424 static uint16_t
tc_call_set_sampler_views(struct pipe_context * pipe,void * call,uint64_t * last)1425 tc_call_set_sampler_views(struct pipe_context *pipe, void *call, uint64_t *last)
1426 {
1427 struct tc_sampler_views *p = (struct tc_sampler_views *)call;
1428
1429 pipe->set_sampler_views(pipe, p->shader, p->start, p->count,
1430 p->unbind_num_trailing_slots, true, p->slot);
1431 return p->base.num_slots;
1432 }
1433
1434 static void
tc_set_sampler_views(struct pipe_context * _pipe,enum pipe_shader_type shader,unsigned start,unsigned count,unsigned unbind_num_trailing_slots,bool take_ownership,struct pipe_sampler_view ** views)1435 tc_set_sampler_views(struct pipe_context *_pipe,
1436 enum pipe_shader_type shader,
1437 unsigned start, unsigned count,
1438 unsigned unbind_num_trailing_slots, bool take_ownership,
1439 struct pipe_sampler_view **views)
1440 {
1441 if (!count && !unbind_num_trailing_slots)
1442 return;
1443
1444 struct threaded_context *tc = threaded_context(_pipe);
1445 struct tc_sampler_views *p =
1446 tc_add_slot_based_call(tc, TC_CALL_set_sampler_views, tc_sampler_views,
1447 views ? count : 0);
1448
1449 p->shader = shader;
1450 p->start = start;
1451
1452 if (views) {
1453 struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
1454
1455 p->count = count;
1456 p->unbind_num_trailing_slots = unbind_num_trailing_slots;
1457
1458 if (take_ownership) {
1459 memcpy(p->slot, views, sizeof(*views) * count);
1460
1461 for (unsigned i = 0; i < count; i++) {
1462 if (views[i] && views[i]->target == PIPE_BUFFER) {
1463 tc_bind_buffer(tc, &tc->sampler_buffers[shader][start + i], next,
1464 views[i]->texture);
1465 } else {
1466 tc_unbind_buffer(&tc->sampler_buffers[shader][start + i]);
1467 }
1468 }
1469 } else {
1470 for (unsigned i = 0; i < count; i++) {
1471 p->slot[i] = NULL;
1472 pipe_sampler_view_reference(&p->slot[i], views[i]);
1473
1474 if (views[i] && views[i]->target == PIPE_BUFFER) {
1475 tc_bind_buffer(tc, &tc->sampler_buffers[shader][start + i], next,
1476 views[i]->texture);
1477 } else {
1478 tc_unbind_buffer(&tc->sampler_buffers[shader][start + i]);
1479 }
1480 }
1481 }
1482
1483 tc_unbind_buffers(&tc->sampler_buffers[shader][start + count],
1484 unbind_num_trailing_slots);
1485 tc->seen_sampler_buffers[shader] = true;
1486 } else {
1487 p->count = 0;
1488 p->unbind_num_trailing_slots = count + unbind_num_trailing_slots;
1489
1490 tc_unbind_buffers(&tc->sampler_buffers[shader][start],
1491 count + unbind_num_trailing_slots);
1492 }
1493 }
1494
1495 struct tc_shader_images {
1496 struct tc_call_base base;
1497 ubyte shader, start, count;
1498 ubyte unbind_num_trailing_slots;
1499 struct pipe_image_view slot[0]; /* more will be allocated if needed */
1500 };
1501
1502 static uint16_t
tc_call_set_shader_images(struct pipe_context * pipe,void * call,uint64_t * last)1503 tc_call_set_shader_images(struct pipe_context *pipe, void *call, uint64_t *last)
1504 {
1505 struct tc_shader_images *p = (struct tc_shader_images *)call;
1506 unsigned count = p->count;
1507
1508 if (!p->count) {
1509 pipe->set_shader_images(pipe, p->shader, p->start, 0,
1510 p->unbind_num_trailing_slots, NULL);
1511 return call_size(tc_shader_images);
1512 }
1513
1514 pipe->set_shader_images(pipe, p->shader, p->start, p->count,
1515 p->unbind_num_trailing_slots, p->slot);
1516
1517 for (unsigned i = 0; i < count; i++)
1518 tc_drop_resource_reference(p->slot[i].resource);
1519
1520 return p->base.num_slots;
1521 }
1522
1523 static void
tc_set_shader_images(struct pipe_context * _pipe,enum pipe_shader_type shader,unsigned start,unsigned count,unsigned unbind_num_trailing_slots,const struct pipe_image_view * images)1524 tc_set_shader_images(struct pipe_context *_pipe,
1525 enum pipe_shader_type shader,
1526 unsigned start, unsigned count,
1527 unsigned unbind_num_trailing_slots,
1528 const struct pipe_image_view *images)
1529 {
1530 if (!count && !unbind_num_trailing_slots)
1531 return;
1532
1533 struct threaded_context *tc = threaded_context(_pipe);
1534 struct tc_shader_images *p =
1535 tc_add_slot_based_call(tc, TC_CALL_set_shader_images, tc_shader_images,
1536 images ? count : 0);
1537 unsigned writable_buffers = 0;
1538
1539 p->shader = shader;
1540 p->start = start;
1541
1542 if (images) {
1543 p->count = count;
1544 p->unbind_num_trailing_slots = unbind_num_trailing_slots;
1545
1546 struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
1547
1548 for (unsigned i = 0; i < count; i++) {
1549 struct pipe_resource *resource = images[i].resource;
1550
1551 tc_set_resource_reference(&p->slot[i].resource, resource);
1552
1553 if (resource && resource->target == PIPE_BUFFER) {
1554 tc_bind_buffer(tc, &tc->image_buffers[shader][start + i], next, resource);
1555
1556 if (images[i].access & PIPE_IMAGE_ACCESS_WRITE) {
1557 struct threaded_resource *tres = threaded_resource(resource);
1558
1559 tc_buffer_disable_cpu_storage(resource);
1560 util_range_add(&tres->b, &tres->valid_buffer_range,
1561 images[i].u.buf.offset,
1562 images[i].u.buf.offset + images[i].u.buf.size);
1563 writable_buffers |= BITFIELD_BIT(start + i);
1564 }
1565 } else {
1566 tc_unbind_buffer(&tc->image_buffers[shader][start + i]);
1567 }
1568 }
1569 memcpy(p->slot, images, count * sizeof(images[0]));
1570
1571 tc_unbind_buffers(&tc->image_buffers[shader][start + count],
1572 unbind_num_trailing_slots);
1573 tc->seen_image_buffers[shader] = true;
1574 } else {
1575 p->count = 0;
1576 p->unbind_num_trailing_slots = count + unbind_num_trailing_slots;
1577
1578 tc_unbind_buffers(&tc->image_buffers[shader][start],
1579 count + unbind_num_trailing_slots);
1580 }
1581
1582 tc->image_buffers_writeable_mask[shader] &= ~BITFIELD_RANGE(start, count);
1583 tc->image_buffers_writeable_mask[shader] |= writable_buffers;
1584 }
1585
1586 struct tc_shader_buffers {
1587 struct tc_call_base base;
1588 ubyte shader, start, count;
1589 bool unbind;
1590 unsigned writable_bitmask;
1591 struct pipe_shader_buffer slot[0]; /* more will be allocated if needed */
1592 };
1593
1594 static uint16_t
tc_call_set_shader_buffers(struct pipe_context * pipe,void * call,uint64_t * last)1595 tc_call_set_shader_buffers(struct pipe_context *pipe, void *call, uint64_t *last)
1596 {
1597 struct tc_shader_buffers *p = (struct tc_shader_buffers *)call;
1598 unsigned count = p->count;
1599
1600 if (p->unbind) {
1601 pipe->set_shader_buffers(pipe, p->shader, p->start, p->count, NULL, 0);
1602 return call_size(tc_shader_buffers);
1603 }
1604
1605 pipe->set_shader_buffers(pipe, p->shader, p->start, p->count, p->slot,
1606 p->writable_bitmask);
1607
1608 for (unsigned i = 0; i < count; i++)
1609 tc_drop_resource_reference(p->slot[i].buffer);
1610
1611 return p->base.num_slots;
1612 }
1613
1614 static void
tc_set_shader_buffers(struct pipe_context * _pipe,enum pipe_shader_type shader,unsigned start,unsigned count,const struct pipe_shader_buffer * buffers,unsigned writable_bitmask)1615 tc_set_shader_buffers(struct pipe_context *_pipe,
1616 enum pipe_shader_type shader,
1617 unsigned start, unsigned count,
1618 const struct pipe_shader_buffer *buffers,
1619 unsigned writable_bitmask)
1620 {
1621 if (!count)
1622 return;
1623
1624 struct threaded_context *tc = threaded_context(_pipe);
1625 struct tc_shader_buffers *p =
1626 tc_add_slot_based_call(tc, TC_CALL_set_shader_buffers, tc_shader_buffers,
1627 buffers ? count : 0);
1628
1629 p->shader = shader;
1630 p->start = start;
1631 p->count = count;
1632 p->unbind = buffers == NULL;
1633 p->writable_bitmask = writable_bitmask;
1634
1635 if (buffers) {
1636 struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
1637
1638 for (unsigned i = 0; i < count; i++) {
1639 struct pipe_shader_buffer *dst = &p->slot[i];
1640 const struct pipe_shader_buffer *src = buffers + i;
1641
1642 tc_set_resource_reference(&dst->buffer, src->buffer);
1643 dst->buffer_offset = src->buffer_offset;
1644 dst->buffer_size = src->buffer_size;
1645
1646 if (src->buffer) {
1647 struct threaded_resource *tres = threaded_resource(src->buffer);
1648
1649 tc_bind_buffer(tc, &tc->shader_buffers[shader][start + i], next, &tres->b);
1650
1651 if (writable_bitmask & BITFIELD_BIT(i)) {
1652 tc_buffer_disable_cpu_storage(src->buffer);
1653 util_range_add(&tres->b, &tres->valid_buffer_range,
1654 src->buffer_offset,
1655 src->buffer_offset + src->buffer_size);
1656 }
1657 } else {
1658 tc_unbind_buffer(&tc->shader_buffers[shader][start + i]);
1659 }
1660 }
1661 tc->seen_shader_buffers[shader] = true;
1662 } else {
1663 tc_unbind_buffers(&tc->shader_buffers[shader][start], count);
1664 }
1665
1666 tc->shader_buffers_writeable_mask[shader] &= ~BITFIELD_RANGE(start, count);
1667 tc->shader_buffers_writeable_mask[shader] |= writable_bitmask << start;
1668 }
1669
1670 struct tc_vertex_buffers {
1671 struct tc_call_base base;
1672 ubyte start, count;
1673 ubyte unbind_num_trailing_slots;
1674 struct pipe_vertex_buffer slot[0]; /* more will be allocated if needed */
1675 };
1676
1677 static uint16_t
tc_call_set_vertex_buffers(struct pipe_context * pipe,void * call,uint64_t * last)1678 tc_call_set_vertex_buffers(struct pipe_context *pipe, void *call, uint64_t *last)
1679 {
1680 struct tc_vertex_buffers *p = (struct tc_vertex_buffers *)call;
1681 unsigned count = p->count;
1682
1683 if (!count) {
1684 pipe->set_vertex_buffers(pipe, p->start, 0,
1685 p->unbind_num_trailing_slots, false, NULL);
1686 return call_size(tc_vertex_buffers);
1687 }
1688
1689 for (unsigned i = 0; i < count; i++)
1690 tc_assert(!p->slot[i].is_user_buffer);
1691
1692 pipe->set_vertex_buffers(pipe, p->start, count,
1693 p->unbind_num_trailing_slots, true, p->slot);
1694 return p->base.num_slots;
1695 }
1696
1697 static void
tc_set_vertex_buffers(struct pipe_context * _pipe,unsigned start,unsigned count,unsigned unbind_num_trailing_slots,bool take_ownership,const struct pipe_vertex_buffer * buffers)1698 tc_set_vertex_buffers(struct pipe_context *_pipe,
1699 unsigned start, unsigned count,
1700 unsigned unbind_num_trailing_slots,
1701 bool take_ownership,
1702 const struct pipe_vertex_buffer *buffers)
1703 {
1704 struct threaded_context *tc = threaded_context(_pipe);
1705
1706 if (!count && !unbind_num_trailing_slots)
1707 return;
1708
1709 if (count && buffers) {
1710 struct tc_vertex_buffers *p =
1711 tc_add_slot_based_call(tc, TC_CALL_set_vertex_buffers, tc_vertex_buffers, count);
1712 p->start = start;
1713 p->count = count;
1714 p->unbind_num_trailing_slots = unbind_num_trailing_slots;
1715
1716 struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
1717
1718 if (take_ownership) {
1719 memcpy(p->slot, buffers, count * sizeof(struct pipe_vertex_buffer));
1720
1721 for (unsigned i = 0; i < count; i++) {
1722 struct pipe_resource *buf = buffers[i].buffer.resource;
1723
1724 if (buf) {
1725 tc_bind_buffer(tc, &tc->vertex_buffers[start + i], next, buf);
1726 } else {
1727 tc_unbind_buffer(&tc->vertex_buffers[start + i]);
1728 }
1729 }
1730 } else {
1731 for (unsigned i = 0; i < count; i++) {
1732 struct pipe_vertex_buffer *dst = &p->slot[i];
1733 const struct pipe_vertex_buffer *src = buffers + i;
1734 struct pipe_resource *buf = src->buffer.resource;
1735
1736 tc_assert(!src->is_user_buffer);
1737 dst->stride = src->stride;
1738 dst->is_user_buffer = false;
1739 tc_set_resource_reference(&dst->buffer.resource, buf);
1740 dst->buffer_offset = src->buffer_offset;
1741
1742 if (buf) {
1743 tc_bind_buffer(tc, &tc->vertex_buffers[start + i], next, buf);
1744 } else {
1745 tc_unbind_buffer(&tc->vertex_buffers[start + i]);
1746 }
1747 }
1748 }
1749
1750 tc_unbind_buffers(&tc->vertex_buffers[start + count],
1751 unbind_num_trailing_slots);
1752 } else {
1753 struct tc_vertex_buffers *p =
1754 tc_add_slot_based_call(tc, TC_CALL_set_vertex_buffers, tc_vertex_buffers, 0);
1755 p->start = start;
1756 p->count = 0;
1757 p->unbind_num_trailing_slots = count + unbind_num_trailing_slots;
1758
1759 tc_unbind_buffers(&tc->vertex_buffers[start],
1760 count + unbind_num_trailing_slots);
1761 }
1762 }
1763
1764 struct tc_stream_outputs {
1765 struct tc_call_base base;
1766 unsigned count;
1767 struct pipe_stream_output_target *targets[PIPE_MAX_SO_BUFFERS];
1768 unsigned offsets[PIPE_MAX_SO_BUFFERS];
1769 };
1770
1771 static uint16_t
tc_call_set_stream_output_targets(struct pipe_context * pipe,void * call,uint64_t * last)1772 tc_call_set_stream_output_targets(struct pipe_context *pipe, void *call, uint64_t *last)
1773 {
1774 struct tc_stream_outputs *p = to_call(call, tc_stream_outputs);
1775 unsigned count = p->count;
1776
1777 pipe->set_stream_output_targets(pipe, count, p->targets, p->offsets);
1778 for (unsigned i = 0; i < count; i++)
1779 tc_drop_so_target_reference(p->targets[i]);
1780
1781 return call_size(tc_stream_outputs);
1782 }
1783
1784 static void
tc_set_stream_output_targets(struct pipe_context * _pipe,unsigned count,struct pipe_stream_output_target ** tgs,const unsigned * offsets)1785 tc_set_stream_output_targets(struct pipe_context *_pipe,
1786 unsigned count,
1787 struct pipe_stream_output_target **tgs,
1788 const unsigned *offsets)
1789 {
1790 struct threaded_context *tc = threaded_context(_pipe);
1791 struct tc_stream_outputs *p =
1792 tc_add_call(tc, TC_CALL_set_stream_output_targets, tc_stream_outputs);
1793 struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
1794
1795 for (unsigned i = 0; i < count; i++) {
1796 p->targets[i] = NULL;
1797 pipe_so_target_reference(&p->targets[i], tgs[i]);
1798 if (tgs[i]) {
1799 tc_buffer_disable_cpu_storage(tgs[i]->buffer);
1800 tc_bind_buffer(tc, &tc->streamout_buffers[i], next, tgs[i]->buffer);
1801 } else {
1802 tc_unbind_buffer(&tc->streamout_buffers[i]);
1803 }
1804 }
1805 p->count = count;
1806 memcpy(p->offsets, offsets, count * sizeof(unsigned));
1807
1808 tc_unbind_buffers(&tc->streamout_buffers[count], PIPE_MAX_SO_BUFFERS - count);
1809 if (count)
1810 tc->seen_streamout_buffers = true;
1811 }
1812
1813 static void
tc_set_compute_resources(struct pipe_context * _pipe,unsigned start,unsigned count,struct pipe_surface ** resources)1814 tc_set_compute_resources(struct pipe_context *_pipe, unsigned start,
1815 unsigned count, struct pipe_surface **resources)
1816 {
1817 struct threaded_context *tc = threaded_context(_pipe);
1818 struct pipe_context *pipe = tc->pipe;
1819
1820 tc_sync(tc);
1821 pipe->set_compute_resources(pipe, start, count, resources);
1822 }
1823
1824 static void
tc_set_global_binding(struct pipe_context * _pipe,unsigned first,unsigned count,struct pipe_resource ** resources,uint32_t ** handles)1825 tc_set_global_binding(struct pipe_context *_pipe, unsigned first,
1826 unsigned count, struct pipe_resource **resources,
1827 uint32_t **handles)
1828 {
1829 struct threaded_context *tc = threaded_context(_pipe);
1830 struct pipe_context *pipe = tc->pipe;
1831
1832 tc_sync(tc);
1833 pipe->set_global_binding(pipe, first, count, resources, handles);
1834 }
1835
1836
1837 /********************************************************************
1838 * views
1839 */
1840
1841 static struct pipe_surface *
tc_create_surface(struct pipe_context * _pipe,struct pipe_resource * resource,const struct pipe_surface * surf_tmpl)1842 tc_create_surface(struct pipe_context *_pipe,
1843 struct pipe_resource *resource,
1844 const struct pipe_surface *surf_tmpl)
1845 {
1846 struct pipe_context *pipe = threaded_context(_pipe)->pipe;
1847 struct pipe_surface *view =
1848 pipe->create_surface(pipe, resource, surf_tmpl);
1849
1850 if (view)
1851 view->context = _pipe;
1852 return view;
1853 }
1854
1855 static void
tc_surface_destroy(struct pipe_context * _pipe,struct pipe_surface * surf)1856 tc_surface_destroy(struct pipe_context *_pipe,
1857 struct pipe_surface *surf)
1858 {
1859 struct pipe_context *pipe = threaded_context(_pipe)->pipe;
1860
1861 pipe->surface_destroy(pipe, surf);
1862 }
1863
1864 static struct pipe_sampler_view *
tc_create_sampler_view(struct pipe_context * _pipe,struct pipe_resource * resource,const struct pipe_sampler_view * templ)1865 tc_create_sampler_view(struct pipe_context *_pipe,
1866 struct pipe_resource *resource,
1867 const struct pipe_sampler_view *templ)
1868 {
1869 struct pipe_context *pipe = threaded_context(_pipe)->pipe;
1870 struct pipe_sampler_view *view =
1871 pipe->create_sampler_view(pipe, resource, templ);
1872
1873 if (view)
1874 view->context = _pipe;
1875 return view;
1876 }
1877
1878 static void
tc_sampler_view_destroy(struct pipe_context * _pipe,struct pipe_sampler_view * view)1879 tc_sampler_view_destroy(struct pipe_context *_pipe,
1880 struct pipe_sampler_view *view)
1881 {
1882 struct pipe_context *pipe = threaded_context(_pipe)->pipe;
1883
1884 pipe->sampler_view_destroy(pipe, view);
1885 }
1886
1887 static struct pipe_stream_output_target *
tc_create_stream_output_target(struct pipe_context * _pipe,struct pipe_resource * res,unsigned buffer_offset,unsigned buffer_size)1888 tc_create_stream_output_target(struct pipe_context *_pipe,
1889 struct pipe_resource *res,
1890 unsigned buffer_offset,
1891 unsigned buffer_size)
1892 {
1893 struct pipe_context *pipe = threaded_context(_pipe)->pipe;
1894 struct threaded_resource *tres = threaded_resource(res);
1895 struct pipe_stream_output_target *view;
1896
1897 util_range_add(&tres->b, &tres->valid_buffer_range, buffer_offset,
1898 buffer_offset + buffer_size);
1899
1900 view = pipe->create_stream_output_target(pipe, res, buffer_offset,
1901 buffer_size);
1902 if (view)
1903 view->context = _pipe;
1904 return view;
1905 }
1906
1907 static void
tc_stream_output_target_destroy(struct pipe_context * _pipe,struct pipe_stream_output_target * target)1908 tc_stream_output_target_destroy(struct pipe_context *_pipe,
1909 struct pipe_stream_output_target *target)
1910 {
1911 struct pipe_context *pipe = threaded_context(_pipe)->pipe;
1912
1913 pipe->stream_output_target_destroy(pipe, target);
1914 }
1915
1916
1917 /********************************************************************
1918 * bindless
1919 */
1920
1921 static uint64_t
tc_create_texture_handle(struct pipe_context * _pipe,struct pipe_sampler_view * view,const struct pipe_sampler_state * state)1922 tc_create_texture_handle(struct pipe_context *_pipe,
1923 struct pipe_sampler_view *view,
1924 const struct pipe_sampler_state *state)
1925 {
1926 struct threaded_context *tc = threaded_context(_pipe);
1927 struct pipe_context *pipe = tc->pipe;
1928
1929 tc_sync(tc);
1930 return pipe->create_texture_handle(pipe, view, state);
1931 }
1932
1933 struct tc_make_texture_handle_resident {
1934 struct tc_call_base base;
1935 bool resident;
1936 uint64_t handle;
1937 };
1938
1939 static uint16_t
tc_call_make_texture_handle_resident(struct pipe_context * pipe,void * call,uint64_t * last)1940 tc_call_make_texture_handle_resident(struct pipe_context *pipe, void *call, uint64_t *last)
1941 {
1942 struct tc_make_texture_handle_resident *p =
1943 to_call(call, tc_make_texture_handle_resident);
1944
1945 pipe->make_texture_handle_resident(pipe, p->handle, p->resident);
1946 return call_size(tc_make_texture_handle_resident);
1947 }
1948
1949 static void
tc_make_texture_handle_resident(struct pipe_context * _pipe,uint64_t handle,bool resident)1950 tc_make_texture_handle_resident(struct pipe_context *_pipe, uint64_t handle,
1951 bool resident)
1952 {
1953 struct threaded_context *tc = threaded_context(_pipe);
1954 struct tc_make_texture_handle_resident *p =
1955 tc_add_call(tc, TC_CALL_make_texture_handle_resident,
1956 tc_make_texture_handle_resident);
1957
1958 p->handle = handle;
1959 p->resident = resident;
1960 }
1961
1962 static uint64_t
tc_create_image_handle(struct pipe_context * _pipe,const struct pipe_image_view * image)1963 tc_create_image_handle(struct pipe_context *_pipe,
1964 const struct pipe_image_view *image)
1965 {
1966 struct threaded_context *tc = threaded_context(_pipe);
1967 struct pipe_context *pipe = tc->pipe;
1968
1969 if (image->resource->target == PIPE_BUFFER)
1970 tc_buffer_disable_cpu_storage(image->resource);
1971
1972 tc_sync(tc);
1973 return pipe->create_image_handle(pipe, image);
1974 }
1975
1976 struct tc_make_image_handle_resident {
1977 struct tc_call_base base;
1978 bool resident;
1979 unsigned access;
1980 uint64_t handle;
1981 };
1982
1983 static uint16_t
tc_call_make_image_handle_resident(struct pipe_context * pipe,void * call,uint64_t * last)1984 tc_call_make_image_handle_resident(struct pipe_context *pipe, void *call, uint64_t *last)
1985 {
1986 struct tc_make_image_handle_resident *p =
1987 to_call(call, tc_make_image_handle_resident);
1988
1989 pipe->make_image_handle_resident(pipe, p->handle, p->access, p->resident);
1990 return call_size(tc_make_image_handle_resident);
1991 }
1992
1993 static void
tc_make_image_handle_resident(struct pipe_context * _pipe,uint64_t handle,unsigned access,bool resident)1994 tc_make_image_handle_resident(struct pipe_context *_pipe, uint64_t handle,
1995 unsigned access, bool resident)
1996 {
1997 struct threaded_context *tc = threaded_context(_pipe);
1998 struct tc_make_image_handle_resident *p =
1999 tc_add_call(tc, TC_CALL_make_image_handle_resident,
2000 tc_make_image_handle_resident);
2001
2002 p->handle = handle;
2003 p->access = access;
2004 p->resident = resident;
2005 }
2006
2007
2008 /********************************************************************
2009 * transfer
2010 */
2011
2012 struct tc_replace_buffer_storage {
2013 struct tc_call_base base;
2014 uint16_t num_rebinds;
2015 uint32_t rebind_mask;
2016 uint32_t delete_buffer_id;
2017 struct pipe_resource *dst;
2018 struct pipe_resource *src;
2019 tc_replace_buffer_storage_func func;
2020 };
2021
2022 static uint16_t
tc_call_replace_buffer_storage(struct pipe_context * pipe,void * call,uint64_t * last)2023 tc_call_replace_buffer_storage(struct pipe_context *pipe, void *call, uint64_t *last)
2024 {
2025 struct tc_replace_buffer_storage *p = to_call(call, tc_replace_buffer_storage);
2026
2027 p->func(pipe, p->dst, p->src, p->num_rebinds, p->rebind_mask, p->delete_buffer_id);
2028
2029 tc_drop_resource_reference(p->dst);
2030 tc_drop_resource_reference(p->src);
2031 return call_size(tc_replace_buffer_storage);
2032 }
2033
2034 /* Return true if the buffer has been invalidated or is idle.
2035 * Note that callers must've called tc_touch_buffer before calling
2036 * this function. */
2037 static bool
tc_invalidate_buffer(struct threaded_context * tc,struct threaded_resource * tbuf)2038 tc_invalidate_buffer(struct threaded_context *tc,
2039 struct threaded_resource *tbuf)
2040 {
2041 if (!tc_is_buffer_busy(tc, tbuf, PIPE_MAP_READ_WRITE)) {
2042 /* It's idle, so invalidation would be a no-op, but we can still clear
2043 * the valid range because we are technically doing invalidation, but
2044 * skipping it because it's useless.
2045 *
2046 * If the buffer is bound for write, we can't invalidate the range.
2047 */
2048 if (!tc_is_buffer_bound_for_write(tc, tbuf->buffer_id_unique))
2049 util_range_set_empty(&tbuf->valid_buffer_range);
2050 return true;
2051 }
2052
2053 struct pipe_screen *screen = tc->base.screen;
2054 struct pipe_resource *new_buf;
2055
2056 /* Shared, pinned, and sparse buffers can't be reallocated. */
2057 if (tc_is_buffer_shared(tbuf) ||
2058 tbuf->is_user_ptr ||
2059 tbuf->b.flags & (PIPE_RESOURCE_FLAG_SPARSE | PIPE_RESOURCE_FLAG_UNMAPPABLE))
2060 return false;
2061
2062 /* Allocate a new one. */
2063 new_buf = screen->resource_create(screen, &tbuf->b);
2064 if (!new_buf)
2065 return false;
2066
2067 /* Replace the "latest" pointer. */
2068 if (tbuf->latest != &tbuf->b)
2069 pipe_resource_reference(&tbuf->latest, NULL);
2070
2071 tbuf->latest = new_buf;
2072
2073 uint32_t delete_buffer_id = tbuf->buffer_id_unique;
2074
2075 /* Enqueue storage replacement of the original buffer. */
2076 struct tc_replace_buffer_storage *p =
2077 tc_add_call(tc, TC_CALL_replace_buffer_storage,
2078 tc_replace_buffer_storage);
2079
2080 p->func = tc->replace_buffer_storage;
2081 tc_set_resource_reference(&p->dst, &tbuf->b);
2082 tc_set_resource_reference(&p->src, new_buf);
2083 p->delete_buffer_id = delete_buffer_id;
2084 p->rebind_mask = 0;
2085
2086 /* Treat the current buffer as the new buffer. */
2087 bool bound_for_write = tc_is_buffer_bound_for_write(tc, tbuf->buffer_id_unique);
2088 p->num_rebinds = tc_rebind_buffer(tc, tbuf->buffer_id_unique,
2089 threaded_resource(new_buf)->buffer_id_unique,
2090 &p->rebind_mask);
2091
2092 /* If the buffer is not bound for write, clear the valid range. */
2093 if (!bound_for_write)
2094 util_range_set_empty(&tbuf->valid_buffer_range);
2095
2096 tbuf->buffer_id_unique = threaded_resource(new_buf)->buffer_id_unique;
2097 threaded_resource(new_buf)->buffer_id_unique = 0;
2098
2099 return true;
2100 }
2101
2102 /* Note that callers must've called tc_touch_buffer first before
2103 * calling tc_improve_map_buffer_flags. */
2104 static unsigned
tc_improve_map_buffer_flags(struct threaded_context * tc,struct threaded_resource * tres,unsigned usage,unsigned offset,unsigned size)2105 tc_improve_map_buffer_flags(struct threaded_context *tc,
2106 struct threaded_resource *tres, unsigned usage,
2107 unsigned offset, unsigned size)
2108 {
2109 /* Never invalidate inside the driver and never infer "unsynchronized". */
2110 unsigned tc_flags = TC_TRANSFER_MAP_NO_INVALIDATE |
2111 TC_TRANSFER_MAP_NO_INFER_UNSYNCHRONIZED;
2112
2113 /* Prevent a reentry. */
2114 if (usage & tc_flags)
2115 return usage;
2116
2117 /* Use the staging upload if it's preferred. */
2118 if (usage & (PIPE_MAP_DISCARD_RANGE |
2119 PIPE_MAP_DISCARD_WHOLE_RESOURCE) &&
2120 !(usage & PIPE_MAP_PERSISTENT) &&
2121 tres->b.flags & PIPE_RESOURCE_FLAG_DONT_MAP_DIRECTLY &&
2122 tc->use_forced_staging_uploads) {
2123 usage &= ~(PIPE_MAP_DISCARD_WHOLE_RESOURCE |
2124 PIPE_MAP_UNSYNCHRONIZED);
2125
2126 return usage | tc_flags | PIPE_MAP_DISCARD_RANGE;
2127 }
2128
2129 /* Sparse buffers can't be mapped directly and can't be reallocated
2130 * (fully invalidated). That may just be a radeonsi limitation, but
2131 * the threaded context must obey it with radeonsi.
2132 */
2133 if (tres->b.flags & (PIPE_RESOURCE_FLAG_SPARSE | PIPE_RESOURCE_FLAG_UNMAPPABLE)) {
2134 /* We can use DISCARD_RANGE instead of full discard. This is the only
2135 * fast path for sparse buffers that doesn't need thread synchronization.
2136 */
2137 if (usage & PIPE_MAP_DISCARD_WHOLE_RESOURCE)
2138 usage |= PIPE_MAP_DISCARD_RANGE;
2139
2140 /* Allow DISCARD_WHOLE_RESOURCE and infering UNSYNCHRONIZED in drivers.
2141 * The threaded context doesn't do unsychronized mappings and invalida-
2142 * tions of sparse buffers, therefore a correct driver behavior won't
2143 * result in an incorrect behavior with the threaded context.
2144 */
2145 return usage;
2146 }
2147
2148 usage |= tc_flags;
2149
2150 /* Handle CPU reads trivially. */
2151 if (usage & PIPE_MAP_READ) {
2152 if (usage & PIPE_MAP_UNSYNCHRONIZED)
2153 usage |= TC_TRANSFER_MAP_THREADED_UNSYNC; /* don't sync */
2154
2155 /* Drivers aren't allowed to do buffer invalidations. */
2156 return usage & ~PIPE_MAP_DISCARD_WHOLE_RESOURCE;
2157 }
2158
2159 /* See if the buffer range being mapped has never been initialized or
2160 * the buffer is idle, in which case it can be mapped unsynchronized. */
2161 if (!(usage & PIPE_MAP_UNSYNCHRONIZED) &&
2162 ((!tres->is_shared &&
2163 !util_ranges_intersect(&tres->valid_buffer_range, offset, offset + size)) ||
2164 !tc_is_buffer_busy(tc, tres, usage)))
2165 usage |= PIPE_MAP_UNSYNCHRONIZED;
2166
2167 if (!(usage & PIPE_MAP_UNSYNCHRONIZED)) {
2168 /* If discarding the entire range, discard the whole resource instead. */
2169 if (usage & PIPE_MAP_DISCARD_RANGE &&
2170 offset == 0 && size == tres->b.width0)
2171 usage |= PIPE_MAP_DISCARD_WHOLE_RESOURCE;
2172
2173 /* Discard the whole resource if needed. */
2174 if (usage & PIPE_MAP_DISCARD_WHOLE_RESOURCE) {
2175 if (tc_invalidate_buffer(tc, tres))
2176 usage |= PIPE_MAP_UNSYNCHRONIZED;
2177 else
2178 usage |= PIPE_MAP_DISCARD_RANGE; /* fallback */
2179 }
2180 }
2181
2182 /* We won't need this flag anymore. */
2183 /* TODO: We might not need TC_TRANSFER_MAP_NO_INVALIDATE with this. */
2184 usage &= ~PIPE_MAP_DISCARD_WHOLE_RESOURCE;
2185
2186 /* GL_AMD_pinned_memory and persistent mappings can't use staging
2187 * buffers. */
2188 if (usage & (PIPE_MAP_UNSYNCHRONIZED |
2189 PIPE_MAP_PERSISTENT) ||
2190 tres->is_user_ptr)
2191 usage &= ~PIPE_MAP_DISCARD_RANGE;
2192
2193 /* Unsychronized buffer mappings don't have to synchronize the thread. */
2194 if (usage & PIPE_MAP_UNSYNCHRONIZED) {
2195 usage &= ~PIPE_MAP_DISCARD_RANGE;
2196 usage |= TC_TRANSFER_MAP_THREADED_UNSYNC; /* notify the driver */
2197 }
2198
2199 return usage;
2200 }
2201
2202 static void *
tc_buffer_map(struct pipe_context * _pipe,struct pipe_resource * resource,unsigned level,unsigned usage,const struct pipe_box * box,struct pipe_transfer ** transfer)2203 tc_buffer_map(struct pipe_context *_pipe,
2204 struct pipe_resource *resource, unsigned level,
2205 unsigned usage, const struct pipe_box *box,
2206 struct pipe_transfer **transfer)
2207 {
2208 struct threaded_context *tc = threaded_context(_pipe);
2209 struct threaded_resource *tres = threaded_resource(resource);
2210 struct pipe_context *pipe = tc->pipe;
2211
2212 /* PIPE_MAP_THREAD_SAFE is for glthread, which shouldn't use the CPU storage and
2213 * this shouldn't normally be necessary because glthread only uses large buffers.
2214 */
2215 if (usage & PIPE_MAP_THREAD_SAFE)
2216 tc_buffer_disable_cpu_storage(resource);
2217
2218 tc_touch_buffer(tc, tres);
2219
2220 /* CPU storage relies on buffer invalidation never failing. With shared buffers,
2221 * invalidation might not always be possible, so CPU storage can't be used.
2222 */
2223 if (tc_is_buffer_shared(tres))
2224 tc_buffer_disable_cpu_storage(resource);
2225
2226 usage = tc_improve_map_buffer_flags(tc, tres, usage, box->x, box->width);
2227
2228 /* If the CPU storage is enabled, return it directly. */
2229 if (tres->allow_cpu_storage && !(usage & TC_TRANSFER_MAP_UPLOAD_CPU_STORAGE)) {
2230 /* We can't let resource_copy_region disable the CPU storage. */
2231 assert(!(tres->b.flags & PIPE_RESOURCE_FLAG_DONT_MAP_DIRECTLY));
2232
2233 if (!tres->cpu_storage) {
2234 tres->cpu_storage = align_malloc(resource->width0, tc->map_buffer_alignment);
2235
2236 if (tres->cpu_storage && tres->valid_buffer_range.end) {
2237 /* The GPU buffer contains valid data. Copy them to the CPU storage. */
2238 struct pipe_box box2;
2239 struct pipe_transfer *transfer2;
2240
2241 unsigned valid_range_len = tres->valid_buffer_range.end - tres->valid_buffer_range.start;
2242 u_box_1d(tres->valid_buffer_range.start, valid_range_len, &box2);
2243
2244 tc_sync_msg(tc, "cpu storage GPU -> CPU copy");
2245 tc_set_driver_thread(tc);
2246
2247 void *ret = pipe->buffer_map(pipe, tres->latest ? tres->latest : resource,
2248 0, PIPE_MAP_READ, &box2, &transfer2);
2249 memcpy(&((uint8_t*)tres->cpu_storage)[tres->valid_buffer_range.start],
2250 ret,
2251 valid_range_len);
2252 pipe->buffer_unmap(pipe, transfer2);
2253
2254 tc_clear_driver_thread(tc);
2255 }
2256 }
2257
2258 if (tres->cpu_storage) {
2259 struct threaded_transfer *ttrans = slab_zalloc(&tc->pool_transfers);
2260 ttrans->b.resource = resource;
2261 ttrans->b.usage = usage;
2262 ttrans->b.box = *box;
2263 ttrans->valid_buffer_range = &tres->valid_buffer_range;
2264 ttrans->cpu_storage_mapped = true;
2265 *transfer = &ttrans->b;
2266
2267 return (uint8_t*)tres->cpu_storage + box->x;
2268 } else {
2269 tres->allow_cpu_storage = false;
2270 }
2271 }
2272
2273 /* Do a staging transfer within the threaded context. The driver should
2274 * only get resource_copy_region.
2275 */
2276 if (usage & PIPE_MAP_DISCARD_RANGE) {
2277 struct threaded_transfer *ttrans = slab_zalloc(&tc->pool_transfers);
2278 uint8_t *map;
2279
2280 u_upload_alloc(tc->base.stream_uploader, 0,
2281 box->width + (box->x % tc->map_buffer_alignment),
2282 tc->map_buffer_alignment, &ttrans->b.offset,
2283 &ttrans->staging, (void**)&map);
2284 if (!map) {
2285 slab_free(&tc->pool_transfers, ttrans);
2286 return NULL;
2287 }
2288
2289 ttrans->b.resource = resource;
2290 ttrans->b.level = 0;
2291 ttrans->b.usage = usage;
2292 ttrans->b.box = *box;
2293 ttrans->b.stride = 0;
2294 ttrans->b.layer_stride = 0;
2295 ttrans->valid_buffer_range = &tres->valid_buffer_range;
2296 ttrans->cpu_storage_mapped = false;
2297 *transfer = &ttrans->b;
2298
2299 p_atomic_inc(&tres->pending_staging_uploads);
2300 util_range_add(resource, &tres->pending_staging_uploads_range,
2301 box->x, box->x + box->width);
2302
2303 return map + (box->x % tc->map_buffer_alignment);
2304 }
2305
2306 if (usage & PIPE_MAP_UNSYNCHRONIZED &&
2307 p_atomic_read(&tres->pending_staging_uploads) &&
2308 util_ranges_intersect(&tres->pending_staging_uploads_range, box->x, box->x + box->width)) {
2309 /* Write conflict detected between a staging transfer and the direct mapping we're
2310 * going to do. Resolve the conflict by ignoring UNSYNCHRONIZED so the direct mapping
2311 * will have to wait for the staging transfer completion.
2312 * Note: The conflict detection is only based on the mapped range, not on the actual
2313 * written range(s).
2314 */
2315 usage &= ~PIPE_MAP_UNSYNCHRONIZED & ~TC_TRANSFER_MAP_THREADED_UNSYNC;
2316 tc->use_forced_staging_uploads = false;
2317 }
2318
2319 /* Unsychronized buffer mappings don't have to synchronize the thread. */
2320 if (!(usage & TC_TRANSFER_MAP_THREADED_UNSYNC)) {
2321 tc_sync_msg(tc, usage & PIPE_MAP_DISCARD_RANGE ? " discard_range" :
2322 usage & PIPE_MAP_READ ? " read" : " staging conflict");
2323 tc_set_driver_thread(tc);
2324 }
2325
2326 tc->bytes_mapped_estimate += box->width;
2327
2328 void *ret = pipe->buffer_map(pipe, tres->latest ? tres->latest : resource,
2329 level, usage, box, transfer);
2330 threaded_transfer(*transfer)->valid_buffer_range = &tres->valid_buffer_range;
2331 threaded_transfer(*transfer)->cpu_storage_mapped = false;
2332
2333 if (!(usage & TC_TRANSFER_MAP_THREADED_UNSYNC))
2334 tc_clear_driver_thread(tc);
2335
2336 return ret;
2337 }
2338
2339 static void *
tc_texture_map(struct pipe_context * _pipe,struct pipe_resource * resource,unsigned level,unsigned usage,const struct pipe_box * box,struct pipe_transfer ** transfer)2340 tc_texture_map(struct pipe_context *_pipe,
2341 struct pipe_resource *resource, unsigned level,
2342 unsigned usage, const struct pipe_box *box,
2343 struct pipe_transfer **transfer)
2344 {
2345 struct threaded_context *tc = threaded_context(_pipe);
2346 struct threaded_resource *tres = threaded_resource(resource);
2347 struct pipe_context *pipe = tc->pipe;
2348
2349 tc_sync_msg(tc, "texture");
2350 tc_set_driver_thread(tc);
2351
2352 tc->bytes_mapped_estimate += box->width;
2353
2354 void *ret = pipe->texture_map(pipe, tres->latest ? tres->latest : resource,
2355 level, usage, box, transfer);
2356
2357 if (!(usage & TC_TRANSFER_MAP_THREADED_UNSYNC))
2358 tc_clear_driver_thread(tc);
2359
2360 return ret;
2361 }
2362
2363 struct tc_transfer_flush_region {
2364 struct tc_call_base base;
2365 struct pipe_box box;
2366 struct pipe_transfer *transfer;
2367 };
2368
2369 static uint16_t
tc_call_transfer_flush_region(struct pipe_context * pipe,void * call,uint64_t * last)2370 tc_call_transfer_flush_region(struct pipe_context *pipe, void *call, uint64_t *last)
2371 {
2372 struct tc_transfer_flush_region *p = to_call(call, tc_transfer_flush_region);
2373
2374 pipe->transfer_flush_region(pipe, p->transfer, &p->box);
2375 return call_size(tc_transfer_flush_region);
2376 }
2377
2378 struct tc_resource_copy_region {
2379 struct tc_call_base base;
2380 unsigned dst_level;
2381 unsigned dstx, dsty, dstz;
2382 unsigned src_level;
2383 struct pipe_box src_box;
2384 struct pipe_resource *dst;
2385 struct pipe_resource *src;
2386 };
2387
2388 static void
2389 tc_resource_copy_region(struct pipe_context *_pipe,
2390 struct pipe_resource *dst, unsigned dst_level,
2391 unsigned dstx, unsigned dsty, unsigned dstz,
2392 struct pipe_resource *src, unsigned src_level,
2393 const struct pipe_box *src_box);
2394
2395 static void
tc_buffer_do_flush_region(struct threaded_context * tc,struct threaded_transfer * ttrans,const struct pipe_box * box)2396 tc_buffer_do_flush_region(struct threaded_context *tc,
2397 struct threaded_transfer *ttrans,
2398 const struct pipe_box *box)
2399 {
2400 struct threaded_resource *tres = threaded_resource(ttrans->b.resource);
2401
2402 if (ttrans->staging) {
2403 struct pipe_box src_box;
2404
2405 u_box_1d(ttrans->b.offset + ttrans->b.box.x % tc->map_buffer_alignment +
2406 (box->x - ttrans->b.box.x),
2407 box->width, &src_box);
2408
2409 /* Copy the staging buffer into the original one. */
2410 tc_resource_copy_region(&tc->base, ttrans->b.resource, 0, box->x, 0, 0,
2411 ttrans->staging, 0, &src_box);
2412 }
2413
2414 /* Don't update the valid range when we're uploading the CPU storage
2415 * because it includes the uninitialized range too.
2416 */
2417 if (!(ttrans->b.usage & TC_TRANSFER_MAP_UPLOAD_CPU_STORAGE)) {
2418 util_range_add(&tres->b, ttrans->valid_buffer_range,
2419 box->x, box->x + box->width);
2420 }
2421 }
2422
2423 static void
tc_transfer_flush_region(struct pipe_context * _pipe,struct pipe_transfer * transfer,const struct pipe_box * rel_box)2424 tc_transfer_flush_region(struct pipe_context *_pipe,
2425 struct pipe_transfer *transfer,
2426 const struct pipe_box *rel_box)
2427 {
2428 struct threaded_context *tc = threaded_context(_pipe);
2429 struct threaded_transfer *ttrans = threaded_transfer(transfer);
2430 struct threaded_resource *tres = threaded_resource(transfer->resource);
2431 unsigned required_usage = PIPE_MAP_WRITE |
2432 PIPE_MAP_FLUSH_EXPLICIT;
2433
2434 if (tres->b.target == PIPE_BUFFER) {
2435 if ((transfer->usage & required_usage) == required_usage) {
2436 struct pipe_box box;
2437
2438 u_box_1d(transfer->box.x + rel_box->x, rel_box->width, &box);
2439 tc_buffer_do_flush_region(tc, ttrans, &box);
2440 }
2441
2442 /* Staging transfers don't send the call to the driver.
2443 *
2444 * Transfers using the CPU storage shouldn't call transfer_flush_region
2445 * in the driver because the buffer is not really mapped on the driver
2446 * side and the CPU storage always re-uploads everything (flush_region
2447 * makes no difference).
2448 */
2449 if (ttrans->staging || ttrans->cpu_storage_mapped)
2450 return;
2451 }
2452
2453 struct tc_transfer_flush_region *p =
2454 tc_add_call(tc, TC_CALL_transfer_flush_region, tc_transfer_flush_region);
2455 p->transfer = transfer;
2456 p->box = *rel_box;
2457 }
2458
2459 static void
2460 tc_flush(struct pipe_context *_pipe, struct pipe_fence_handle **fence,
2461 unsigned flags);
2462
2463 struct tc_buffer_unmap {
2464 struct tc_call_base base;
2465 bool was_staging_transfer;
2466 union {
2467 struct pipe_transfer *transfer;
2468 struct pipe_resource *resource;
2469 };
2470 };
2471
2472 static uint16_t
tc_call_buffer_unmap(struct pipe_context * pipe,void * call,uint64_t * last)2473 tc_call_buffer_unmap(struct pipe_context *pipe, void *call, uint64_t *last)
2474 {
2475 struct tc_buffer_unmap *p = to_call(call, tc_buffer_unmap);
2476
2477 if (p->was_staging_transfer) {
2478 struct threaded_resource *tres = threaded_resource(p->resource);
2479 /* Nothing to do except keeping track of staging uploads */
2480 assert(tres->pending_staging_uploads > 0);
2481 p_atomic_dec(&tres->pending_staging_uploads);
2482 tc_drop_resource_reference(p->resource);
2483 } else {
2484 pipe->buffer_unmap(pipe, p->transfer);
2485 }
2486
2487 return call_size(tc_buffer_unmap);
2488 }
2489
2490 static void
tc_buffer_unmap(struct pipe_context * _pipe,struct pipe_transfer * transfer)2491 tc_buffer_unmap(struct pipe_context *_pipe, struct pipe_transfer *transfer)
2492 {
2493 struct threaded_context *tc = threaded_context(_pipe);
2494 struct threaded_transfer *ttrans = threaded_transfer(transfer);
2495 struct threaded_resource *tres = threaded_resource(transfer->resource);
2496
2497 /* PIPE_MAP_THREAD_SAFE is only valid with UNSYNCHRONIZED. It can be
2498 * called from any thread and bypasses all multithreaded queues.
2499 */
2500 if (transfer->usage & PIPE_MAP_THREAD_SAFE) {
2501 assert(transfer->usage & PIPE_MAP_UNSYNCHRONIZED);
2502 assert(!(transfer->usage & (PIPE_MAP_FLUSH_EXPLICIT |
2503 PIPE_MAP_DISCARD_RANGE)));
2504
2505 struct pipe_context *pipe = tc->pipe;
2506 util_range_add(&tres->b, ttrans->valid_buffer_range,
2507 transfer->box.x, transfer->box.x + transfer->box.width);
2508
2509 pipe->buffer_unmap(pipe, transfer);
2510 return;
2511 }
2512
2513 if (transfer->usage & PIPE_MAP_WRITE &&
2514 !(transfer->usage & PIPE_MAP_FLUSH_EXPLICIT))
2515 tc_buffer_do_flush_region(tc, ttrans, &transfer->box);
2516
2517 if (ttrans->cpu_storage_mapped) {
2518 /* GL allows simultaneous GPU stores with mapped buffers as long as GPU stores don't
2519 * touch the mapped range. That's a problem because GPU stores free the CPU storage.
2520 * If that happens, we just ignore the unmap call and don't upload anything to prevent
2521 * a crash.
2522 *
2523 * Disallow the CPU storage in the driver to work around this.
2524 */
2525 assert(tres->cpu_storage);
2526
2527 if (tres->cpu_storage) {
2528 /* Invalidations shouldn't fail as long as CPU storage is allowed. */
2529 ASSERTED bool invalidated = tc_invalidate_buffer(tc, tres);
2530 assert(invalidated);
2531
2532 tc_buffer_subdata(&tc->base, &tres->b,
2533 PIPE_MAP_UNSYNCHRONIZED |
2534 TC_TRANSFER_MAP_UPLOAD_CPU_STORAGE,
2535 0, tres->b.width0, tres->cpu_storage);
2536 /* This shouldn't have been freed by buffer_subdata. */
2537 assert(tres->cpu_storage);
2538 } else {
2539 static bool warned_once = false;
2540 if (!warned_once) {
2541 fprintf(stderr, "This application is incompatible with cpu_storage.\n");
2542 fprintf(stderr, "Use tc_max_cpu_storage_size=0 to disable it and report this issue to Mesa.\n");
2543 warned_once = true;
2544 }
2545 }
2546
2547 tc_drop_resource_reference(ttrans->staging);
2548 slab_free(&tc->pool_transfers, ttrans);
2549 return;
2550 }
2551
2552 bool was_staging_transfer = false;
2553
2554 if (ttrans->staging) {
2555 was_staging_transfer = true;
2556
2557 tc_drop_resource_reference(ttrans->staging);
2558 slab_free(&tc->pool_transfers, ttrans);
2559 }
2560
2561 struct tc_buffer_unmap *p = tc_add_call(tc, TC_CALL_buffer_unmap,
2562 tc_buffer_unmap);
2563 if (was_staging_transfer) {
2564 tc_set_resource_reference(&p->resource, &tres->b);
2565 p->was_staging_transfer = true;
2566 } else {
2567 p->transfer = transfer;
2568 p->was_staging_transfer = false;
2569 }
2570
2571 /* tc_buffer_map directly maps the buffers, but tc_buffer_unmap
2572 * defers the unmap operation to the batch execution.
2573 * bytes_mapped_estimate is an estimation of the map/unmap bytes delta
2574 * and if it goes over an optional limit the current batch is flushed,
2575 * to reclaim some RAM. */
2576 if (!ttrans->staging && tc->bytes_mapped_limit &&
2577 tc->bytes_mapped_estimate > tc->bytes_mapped_limit) {
2578 tc_flush(_pipe, NULL, PIPE_FLUSH_ASYNC);
2579 }
2580 }
2581
2582 struct tc_texture_unmap {
2583 struct tc_call_base base;
2584 struct pipe_transfer *transfer;
2585 };
2586
2587 static uint16_t
tc_call_texture_unmap(struct pipe_context * pipe,void * call,uint64_t * last)2588 tc_call_texture_unmap(struct pipe_context *pipe, void *call, uint64_t *last)
2589 {
2590 struct tc_texture_unmap *p = (struct tc_texture_unmap *) call;
2591
2592 pipe->texture_unmap(pipe, p->transfer);
2593 return call_size(tc_texture_unmap);
2594 }
2595
2596 static void
tc_texture_unmap(struct pipe_context * _pipe,struct pipe_transfer * transfer)2597 tc_texture_unmap(struct pipe_context *_pipe, struct pipe_transfer *transfer)
2598 {
2599 struct threaded_context *tc = threaded_context(_pipe);
2600 struct threaded_transfer *ttrans = threaded_transfer(transfer);
2601
2602 tc_add_call(tc, TC_CALL_texture_unmap, tc_texture_unmap)->transfer = transfer;
2603
2604 /* tc_texture_map directly maps the textures, but tc_texture_unmap
2605 * defers the unmap operation to the batch execution.
2606 * bytes_mapped_estimate is an estimation of the map/unmap bytes delta
2607 * and if it goes over an optional limit the current batch is flushed,
2608 * to reclaim some RAM. */
2609 if (!ttrans->staging && tc->bytes_mapped_limit &&
2610 tc->bytes_mapped_estimate > tc->bytes_mapped_limit) {
2611 tc_flush(_pipe, NULL, PIPE_FLUSH_ASYNC);
2612 }
2613 }
2614
2615 struct tc_buffer_subdata {
2616 struct tc_call_base base;
2617 unsigned usage, offset, size;
2618 struct pipe_resource *resource;
2619 char slot[0]; /* more will be allocated if needed */
2620 };
2621
2622 static uint16_t
tc_call_buffer_subdata(struct pipe_context * pipe,void * call,uint64_t * last)2623 tc_call_buffer_subdata(struct pipe_context *pipe, void *call, uint64_t *last)
2624 {
2625 struct tc_buffer_subdata *p = (struct tc_buffer_subdata *)call;
2626
2627 pipe->buffer_subdata(pipe, p->resource, p->usage, p->offset, p->size,
2628 p->slot);
2629 tc_drop_resource_reference(p->resource);
2630 return p->base.num_slots;
2631 }
2632
2633 static void
tc_buffer_subdata(struct pipe_context * _pipe,struct pipe_resource * resource,unsigned usage,unsigned offset,unsigned size,const void * data)2634 tc_buffer_subdata(struct pipe_context *_pipe,
2635 struct pipe_resource *resource,
2636 unsigned usage, unsigned offset,
2637 unsigned size, const void *data)
2638 {
2639 struct threaded_context *tc = threaded_context(_pipe);
2640 struct threaded_resource *tres = threaded_resource(resource);
2641
2642 if (!size)
2643 return;
2644
2645 tc_touch_buffer(tc, tres);
2646
2647 usage |= PIPE_MAP_WRITE;
2648
2649 /* PIPE_MAP_DIRECTLY supresses implicit DISCARD_RANGE. */
2650 if (!(usage & PIPE_MAP_DIRECTLY))
2651 usage |= PIPE_MAP_DISCARD_RANGE;
2652
2653 usage = tc_improve_map_buffer_flags(tc, tres, usage, offset, size);
2654
2655 /* Unsychronized and big transfers should use transfer_map. Also handle
2656 * full invalidations, because drivers aren't allowed to do them.
2657 */
2658 if (usage & (PIPE_MAP_UNSYNCHRONIZED |
2659 PIPE_MAP_DISCARD_WHOLE_RESOURCE) ||
2660 size > TC_MAX_SUBDATA_BYTES ||
2661 tres->cpu_storage) {
2662 struct pipe_transfer *transfer;
2663 struct pipe_box box;
2664 uint8_t *map = NULL;
2665
2666 u_box_1d(offset, size, &box);
2667
2668 /* CPU storage is only useful for partial updates. It can add overhead
2669 * on glBufferData calls so avoid using it.
2670 */
2671 if (!tres->cpu_storage && offset == 0 && size == resource->width0)
2672 usage |= TC_TRANSFER_MAP_UPLOAD_CPU_STORAGE;
2673
2674 map = tc_buffer_map(_pipe, resource, 0, usage, &box, &transfer);
2675 if (map) {
2676 memcpy(map, data, size);
2677 tc_buffer_unmap(_pipe, transfer);
2678 }
2679 return;
2680 }
2681
2682 util_range_add(&tres->b, &tres->valid_buffer_range, offset, offset + size);
2683
2684 /* The upload is small. Enqueue it. */
2685 struct tc_buffer_subdata *p =
2686 tc_add_slot_based_call(tc, TC_CALL_buffer_subdata, tc_buffer_subdata, size);
2687
2688 tc_set_resource_reference(&p->resource, resource);
2689 /* This is will always be busy because if it wasn't, tc_improve_map_buffer-
2690 * _flags would set UNSYNCHRONIZED and we wouldn't get here.
2691 */
2692 tc_add_to_buffer_list(tc, &tc->buffer_lists[tc->next_buf_list], resource);
2693 p->usage = usage;
2694 p->offset = offset;
2695 p->size = size;
2696 memcpy(p->slot, data, size);
2697 }
2698
2699 struct tc_texture_subdata {
2700 struct tc_call_base base;
2701 unsigned level, usage, stride, layer_stride;
2702 struct pipe_box box;
2703 struct pipe_resource *resource;
2704 char slot[0]; /* more will be allocated if needed */
2705 };
2706
2707 static uint16_t
tc_call_texture_subdata(struct pipe_context * pipe,void * call,uint64_t * last)2708 tc_call_texture_subdata(struct pipe_context *pipe, void *call, uint64_t *last)
2709 {
2710 struct tc_texture_subdata *p = (struct tc_texture_subdata *)call;
2711
2712 pipe->texture_subdata(pipe, p->resource, p->level, p->usage, &p->box,
2713 p->slot, p->stride, p->layer_stride);
2714 tc_drop_resource_reference(p->resource);
2715 return p->base.num_slots;
2716 }
2717
2718 static void
tc_texture_subdata(struct pipe_context * _pipe,struct pipe_resource * resource,unsigned level,unsigned usage,const struct pipe_box * box,const void * data,unsigned stride,unsigned layer_stride)2719 tc_texture_subdata(struct pipe_context *_pipe,
2720 struct pipe_resource *resource,
2721 unsigned level, unsigned usage,
2722 const struct pipe_box *box,
2723 const void *data, unsigned stride,
2724 unsigned layer_stride)
2725 {
2726 struct threaded_context *tc = threaded_context(_pipe);
2727 unsigned size;
2728
2729 assert(box->height >= 1);
2730 assert(box->depth >= 1);
2731
2732 size = (box->depth - 1) * layer_stride +
2733 (box->height - 1) * stride +
2734 box->width * util_format_get_blocksize(resource->format);
2735 if (!size)
2736 return;
2737
2738 /* Small uploads can be enqueued, big uploads must sync. */
2739 if (size <= TC_MAX_SUBDATA_BYTES) {
2740 struct tc_texture_subdata *p =
2741 tc_add_slot_based_call(tc, TC_CALL_texture_subdata, tc_texture_subdata, size);
2742
2743 tc_set_resource_reference(&p->resource, resource);
2744 p->level = level;
2745 p->usage = usage;
2746 p->box = *box;
2747 p->stride = stride;
2748 p->layer_stride = layer_stride;
2749 memcpy(p->slot, data, size);
2750 } else {
2751 struct pipe_context *pipe = tc->pipe;
2752
2753 tc_sync(tc);
2754 tc_set_driver_thread(tc);
2755 pipe->texture_subdata(pipe, resource, level, usage, box, data,
2756 stride, layer_stride);
2757 tc_clear_driver_thread(tc);
2758 }
2759 }
2760
2761
2762 /********************************************************************
2763 * miscellaneous
2764 */
2765
2766 #define TC_FUNC_SYNC_RET0(ret_type, func) \
2767 static ret_type \
2768 tc_##func(struct pipe_context *_pipe) \
2769 { \
2770 struct threaded_context *tc = threaded_context(_pipe); \
2771 struct pipe_context *pipe = tc->pipe; \
2772 tc_sync(tc); \
2773 return pipe->func(pipe); \
2774 }
2775
TC_FUNC_SYNC_RET0(uint64_t,get_timestamp)2776 TC_FUNC_SYNC_RET0(uint64_t, get_timestamp)
2777
2778 static void
2779 tc_get_sample_position(struct pipe_context *_pipe,
2780 unsigned sample_count, unsigned sample_index,
2781 float *out_value)
2782 {
2783 struct threaded_context *tc = threaded_context(_pipe);
2784 struct pipe_context *pipe = tc->pipe;
2785
2786 tc_sync(tc);
2787 pipe->get_sample_position(pipe, sample_count, sample_index,
2788 out_value);
2789 }
2790
2791 static enum pipe_reset_status
tc_get_device_reset_status(struct pipe_context * _pipe)2792 tc_get_device_reset_status(struct pipe_context *_pipe)
2793 {
2794 struct threaded_context *tc = threaded_context(_pipe);
2795 struct pipe_context *pipe = tc->pipe;
2796
2797 if (!tc->options.unsynchronized_get_device_reset_status)
2798 tc_sync(tc);
2799
2800 return pipe->get_device_reset_status(pipe);
2801 }
2802
2803 static void
tc_set_device_reset_callback(struct pipe_context * _pipe,const struct pipe_device_reset_callback * cb)2804 tc_set_device_reset_callback(struct pipe_context *_pipe,
2805 const struct pipe_device_reset_callback *cb)
2806 {
2807 struct threaded_context *tc = threaded_context(_pipe);
2808 struct pipe_context *pipe = tc->pipe;
2809
2810 tc_sync(tc);
2811 pipe->set_device_reset_callback(pipe, cb);
2812 }
2813
2814 struct tc_string_marker {
2815 struct tc_call_base base;
2816 int len;
2817 char slot[0]; /* more will be allocated if needed */
2818 };
2819
2820 static uint16_t
tc_call_emit_string_marker(struct pipe_context * pipe,void * call,uint64_t * last)2821 tc_call_emit_string_marker(struct pipe_context *pipe, void *call, uint64_t *last)
2822 {
2823 struct tc_string_marker *p = (struct tc_string_marker *)call;
2824 pipe->emit_string_marker(pipe, p->slot, p->len);
2825 return p->base.num_slots;
2826 }
2827
2828 static void
tc_emit_string_marker(struct pipe_context * _pipe,const char * string,int len)2829 tc_emit_string_marker(struct pipe_context *_pipe,
2830 const char *string, int len)
2831 {
2832 struct threaded_context *tc = threaded_context(_pipe);
2833
2834 if (len <= TC_MAX_STRING_MARKER_BYTES) {
2835 struct tc_string_marker *p =
2836 tc_add_slot_based_call(tc, TC_CALL_emit_string_marker, tc_string_marker, len);
2837
2838 memcpy(p->slot, string, len);
2839 p->len = len;
2840 } else {
2841 struct pipe_context *pipe = tc->pipe;
2842
2843 tc_sync(tc);
2844 tc_set_driver_thread(tc);
2845 pipe->emit_string_marker(pipe, string, len);
2846 tc_clear_driver_thread(tc);
2847 }
2848 }
2849
2850 static void
tc_dump_debug_state(struct pipe_context * _pipe,FILE * stream,unsigned flags)2851 tc_dump_debug_state(struct pipe_context *_pipe, FILE *stream,
2852 unsigned flags)
2853 {
2854 struct threaded_context *tc = threaded_context(_pipe);
2855 struct pipe_context *pipe = tc->pipe;
2856
2857 tc_sync(tc);
2858 pipe->dump_debug_state(pipe, stream, flags);
2859 }
2860
2861 static void
tc_set_debug_callback(struct pipe_context * _pipe,const struct util_debug_callback * cb)2862 tc_set_debug_callback(struct pipe_context *_pipe,
2863 const struct util_debug_callback *cb)
2864 {
2865 struct threaded_context *tc = threaded_context(_pipe);
2866 struct pipe_context *pipe = tc->pipe;
2867
2868 tc_sync(tc);
2869
2870 /* Drop all synchronous debug callbacks. Drivers are expected to be OK
2871 * with this. shader-db will use an environment variable to disable
2872 * the threaded context.
2873 */
2874 if (cb && !cb->async)
2875 pipe->set_debug_callback(pipe, NULL);
2876 else
2877 pipe->set_debug_callback(pipe, cb);
2878 }
2879
2880 static void
tc_set_log_context(struct pipe_context * _pipe,struct u_log_context * log)2881 tc_set_log_context(struct pipe_context *_pipe, struct u_log_context *log)
2882 {
2883 struct threaded_context *tc = threaded_context(_pipe);
2884 struct pipe_context *pipe = tc->pipe;
2885
2886 tc_sync(tc);
2887 pipe->set_log_context(pipe, log);
2888 }
2889
2890 static void
tc_create_fence_fd(struct pipe_context * _pipe,struct pipe_fence_handle ** fence,int fd,enum pipe_fd_type type)2891 tc_create_fence_fd(struct pipe_context *_pipe,
2892 struct pipe_fence_handle **fence, int fd,
2893 enum pipe_fd_type type)
2894 {
2895 struct threaded_context *tc = threaded_context(_pipe);
2896 struct pipe_context *pipe = tc->pipe;
2897
2898 tc_sync(tc);
2899 pipe->create_fence_fd(pipe, fence, fd, type);
2900 }
2901
2902 struct tc_fence_call {
2903 struct tc_call_base base;
2904 struct pipe_fence_handle *fence;
2905 };
2906
2907 static uint16_t
tc_call_fence_server_sync(struct pipe_context * pipe,void * call,uint64_t * last)2908 tc_call_fence_server_sync(struct pipe_context *pipe, void *call, uint64_t *last)
2909 {
2910 struct pipe_fence_handle *fence = to_call(call, tc_fence_call)->fence;
2911
2912 pipe->fence_server_sync(pipe, fence);
2913 pipe->screen->fence_reference(pipe->screen, &fence, NULL);
2914 return call_size(tc_fence_call);
2915 }
2916
2917 static void
tc_fence_server_sync(struct pipe_context * _pipe,struct pipe_fence_handle * fence)2918 tc_fence_server_sync(struct pipe_context *_pipe,
2919 struct pipe_fence_handle *fence)
2920 {
2921 struct threaded_context *tc = threaded_context(_pipe);
2922 struct pipe_screen *screen = tc->pipe->screen;
2923 struct tc_fence_call *call = tc_add_call(tc, TC_CALL_fence_server_sync,
2924 tc_fence_call);
2925
2926 call->fence = NULL;
2927 screen->fence_reference(screen, &call->fence, fence);
2928 }
2929
2930 static void
tc_fence_server_signal(struct pipe_context * _pipe,struct pipe_fence_handle * fence)2931 tc_fence_server_signal(struct pipe_context *_pipe,
2932 struct pipe_fence_handle *fence)
2933 {
2934 struct threaded_context *tc = threaded_context(_pipe);
2935 struct pipe_context *pipe = tc->pipe;
2936 tc_sync(tc);
2937 pipe->fence_server_signal(pipe, fence);
2938 }
2939
2940 static struct pipe_video_codec *
tc_create_video_codec(UNUSED struct pipe_context * _pipe,UNUSED const struct pipe_video_codec * templ)2941 tc_create_video_codec(UNUSED struct pipe_context *_pipe,
2942 UNUSED const struct pipe_video_codec *templ)
2943 {
2944 unreachable("Threaded context should not be enabled for video APIs");
2945 return NULL;
2946 }
2947
2948 static struct pipe_video_buffer *
tc_create_video_buffer(UNUSED struct pipe_context * _pipe,UNUSED const struct pipe_video_buffer * templ)2949 tc_create_video_buffer(UNUSED struct pipe_context *_pipe,
2950 UNUSED const struct pipe_video_buffer *templ)
2951 {
2952 unreachable("Threaded context should not be enabled for video APIs");
2953 return NULL;
2954 }
2955
2956 struct tc_context_param {
2957 struct tc_call_base base;
2958 enum pipe_context_param param;
2959 unsigned value;
2960 };
2961
2962 static uint16_t
tc_call_set_context_param(struct pipe_context * pipe,void * call,uint64_t * last)2963 tc_call_set_context_param(struct pipe_context *pipe, void *call, uint64_t *last)
2964 {
2965 struct tc_context_param *p = to_call(call, tc_context_param);
2966
2967 if (pipe->set_context_param)
2968 pipe->set_context_param(pipe, p->param, p->value);
2969
2970 return call_size(tc_context_param);
2971 }
2972
2973 static void
tc_set_context_param(struct pipe_context * _pipe,enum pipe_context_param param,unsigned value)2974 tc_set_context_param(struct pipe_context *_pipe,
2975 enum pipe_context_param param,
2976 unsigned value)
2977 {
2978 struct threaded_context *tc = threaded_context(_pipe);
2979
2980 if (param == PIPE_CONTEXT_PARAM_PIN_THREADS_TO_L3_CACHE) {
2981 /* Pin the gallium thread as requested. */
2982 util_set_thread_affinity(tc->queue.threads[0],
2983 util_get_cpu_caps()->L3_affinity_mask[value],
2984 NULL, util_get_cpu_caps()->num_cpu_mask_bits);
2985
2986 /* Execute this immediately (without enqueuing).
2987 * It's required to be thread-safe.
2988 */
2989 struct pipe_context *pipe = tc->pipe;
2990 if (pipe->set_context_param)
2991 pipe->set_context_param(pipe, param, value);
2992 return;
2993 }
2994
2995 if (tc->pipe->set_context_param) {
2996 struct tc_context_param *call =
2997 tc_add_call(tc, TC_CALL_set_context_param, tc_context_param);
2998
2999 call->param = param;
3000 call->value = value;
3001 }
3002 }
3003
3004
3005 /********************************************************************
3006 * draw, launch, clear, blit, copy, flush
3007 */
3008
3009 struct tc_flush_call {
3010 struct tc_call_base base;
3011 unsigned flags;
3012 struct threaded_context *tc;
3013 struct pipe_fence_handle *fence;
3014 };
3015
3016 static void
tc_flush_queries(struct threaded_context * tc)3017 tc_flush_queries(struct threaded_context *tc)
3018 {
3019 struct threaded_query *tq, *tmp;
3020 LIST_FOR_EACH_ENTRY_SAFE(tq, tmp, &tc->unflushed_queries, head_unflushed) {
3021 list_del(&tq->head_unflushed);
3022
3023 /* Memory release semantics: due to a possible race with
3024 * tc_get_query_result, we must ensure that the linked list changes
3025 * are visible before setting tq->flushed.
3026 */
3027 p_atomic_set(&tq->flushed, true);
3028 }
3029 }
3030
3031 static uint16_t
tc_call_flush(struct pipe_context * pipe,void * call,uint64_t * last)3032 tc_call_flush(struct pipe_context *pipe, void *call, uint64_t *last)
3033 {
3034 struct tc_flush_call *p = to_call(call, tc_flush_call);
3035 struct pipe_screen *screen = pipe->screen;
3036
3037 pipe->flush(pipe, p->fence ? &p->fence : NULL, p->flags);
3038 screen->fence_reference(screen, &p->fence, NULL);
3039
3040 if (!(p->flags & PIPE_FLUSH_DEFERRED))
3041 tc_flush_queries(p->tc);
3042
3043 return call_size(tc_flush_call);
3044 }
3045
3046 static void
tc_flush(struct pipe_context * _pipe,struct pipe_fence_handle ** fence,unsigned flags)3047 tc_flush(struct pipe_context *_pipe, struct pipe_fence_handle **fence,
3048 unsigned flags)
3049 {
3050 struct threaded_context *tc = threaded_context(_pipe);
3051 struct pipe_context *pipe = tc->pipe;
3052 struct pipe_screen *screen = pipe->screen;
3053 bool async = flags & (PIPE_FLUSH_DEFERRED | PIPE_FLUSH_ASYNC);
3054
3055 if (async && tc->options.create_fence) {
3056 if (fence) {
3057 struct tc_batch *next = &tc->batch_slots[tc->next];
3058
3059 if (!next->token) {
3060 next->token = malloc(sizeof(*next->token));
3061 if (!next->token)
3062 goto out_of_memory;
3063
3064 pipe_reference_init(&next->token->ref, 1);
3065 next->token->tc = tc;
3066 }
3067
3068 screen->fence_reference(screen, fence,
3069 tc->options.create_fence(pipe, next->token));
3070 if (!*fence)
3071 goto out_of_memory;
3072 }
3073
3074 struct tc_flush_call *p = tc_add_call(tc, TC_CALL_flush, tc_flush_call);
3075 p->tc = tc;
3076 p->fence = fence ? *fence : NULL;
3077 p->flags = flags | TC_FLUSH_ASYNC;
3078
3079 if (!(flags & PIPE_FLUSH_DEFERRED))
3080 tc_batch_flush(tc);
3081 return;
3082 }
3083
3084 out_of_memory:
3085 tc_sync_msg(tc, flags & PIPE_FLUSH_END_OF_FRAME ? "end of frame" :
3086 flags & PIPE_FLUSH_DEFERRED ? "deferred fence" : "normal");
3087
3088 if (!(flags & PIPE_FLUSH_DEFERRED))
3089 tc_flush_queries(tc);
3090 tc_set_driver_thread(tc);
3091 pipe->flush(pipe, fence, flags);
3092 tc_clear_driver_thread(tc);
3093 }
3094
3095 struct tc_draw_single {
3096 struct tc_call_base base;
3097 unsigned index_bias;
3098 struct pipe_draw_info info;
3099 };
3100
3101 struct tc_draw_single_drawid {
3102 struct tc_draw_single base;
3103 unsigned drawid_offset;
3104 };
3105
3106 static uint16_t
tc_call_draw_single_drawid(struct pipe_context * pipe,void * call,uint64_t * last)3107 tc_call_draw_single_drawid(struct pipe_context *pipe, void *call, uint64_t *last)
3108 {
3109 struct tc_draw_single_drawid *info_drawid = to_call(call, tc_draw_single_drawid);
3110 struct tc_draw_single *info = &info_drawid->base;
3111
3112 /* u_threaded_context stores start/count in min/max_index for single draws. */
3113 /* Drivers using u_threaded_context shouldn't use min/max_index. */
3114 struct pipe_draw_start_count_bias draw;
3115
3116 draw.start = info->info.min_index;
3117 draw.count = info->info.max_index;
3118 draw.index_bias = info->index_bias;
3119
3120 info->info.index_bounds_valid = false;
3121 info->info.has_user_indices = false;
3122 info->info.take_index_buffer_ownership = false;
3123
3124 pipe->draw_vbo(pipe, &info->info, info_drawid->drawid_offset, NULL, &draw, 1);
3125 if (info->info.index_size)
3126 tc_drop_resource_reference(info->info.index.resource);
3127
3128 return call_size(tc_draw_single_drawid);
3129 }
3130
3131 static void
simplify_draw_info(struct pipe_draw_info * info)3132 simplify_draw_info(struct pipe_draw_info *info)
3133 {
3134 /* Clear these fields to facilitate draw merging.
3135 * Drivers shouldn't use them.
3136 */
3137 info->has_user_indices = false;
3138 info->index_bounds_valid = false;
3139 info->take_index_buffer_ownership = false;
3140 info->index_bias_varies = false;
3141 info->_pad = 0;
3142
3143 /* This shouldn't be set when merging single draws. */
3144 info->increment_draw_id = false;
3145
3146 if (info->index_size) {
3147 if (!info->primitive_restart)
3148 info->restart_index = 0;
3149 } else {
3150 assert(!info->primitive_restart);
3151 info->primitive_restart = false;
3152 info->restart_index = 0;
3153 info->index.resource = NULL;
3154 }
3155 }
3156
3157 static bool
is_next_call_a_mergeable_draw(struct tc_draw_single * first,struct tc_draw_single * next)3158 is_next_call_a_mergeable_draw(struct tc_draw_single *first,
3159 struct tc_draw_single *next)
3160 {
3161 if (next->base.call_id != TC_CALL_draw_single)
3162 return false;
3163
3164 simplify_draw_info(&next->info);
3165
3166 STATIC_ASSERT(offsetof(struct pipe_draw_info, min_index) ==
3167 sizeof(struct pipe_draw_info) - 8);
3168 STATIC_ASSERT(offsetof(struct pipe_draw_info, max_index) ==
3169 sizeof(struct pipe_draw_info) - 4);
3170 /* All fields must be the same except start and count. */
3171 /* u_threaded_context stores start/count in min/max_index for single draws. */
3172 return memcmp((uint32_t*)&first->info, (uint32_t*)&next->info,
3173 DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX) == 0;
3174 }
3175
3176 static uint16_t
tc_call_draw_single(struct pipe_context * pipe,void * call,uint64_t * last_ptr)3177 tc_call_draw_single(struct pipe_context *pipe, void *call, uint64_t *last_ptr)
3178 {
3179 /* Draw call merging. */
3180 struct tc_draw_single *first = to_call(call, tc_draw_single);
3181 struct tc_draw_single *last = (struct tc_draw_single *)last_ptr;
3182 struct tc_draw_single *next = get_next_call(first, tc_draw_single);
3183
3184 /* If at least 2 consecutive draw calls can be merged... */
3185 if (next != last &&
3186 next->base.call_id == TC_CALL_draw_single) {
3187 simplify_draw_info(&first->info);
3188
3189 if (is_next_call_a_mergeable_draw(first, next)) {
3190 /* The maximum number of merged draws is given by the batch size. */
3191 struct pipe_draw_start_count_bias multi[TC_SLOTS_PER_BATCH / call_size(tc_draw_single)];
3192 unsigned num_draws = 2;
3193 bool index_bias_varies = first->index_bias != next->index_bias;
3194
3195 /* u_threaded_context stores start/count in min/max_index for single draws. */
3196 multi[0].start = first->info.min_index;
3197 multi[0].count = first->info.max_index;
3198 multi[0].index_bias = first->index_bias;
3199 multi[1].start = next->info.min_index;
3200 multi[1].count = next->info.max_index;
3201 multi[1].index_bias = next->index_bias;
3202
3203 /* Find how many other draws can be merged. */
3204 next = get_next_call(next, tc_draw_single);
3205 for (; next != last && is_next_call_a_mergeable_draw(first, next);
3206 next = get_next_call(next, tc_draw_single), num_draws++) {
3207 /* u_threaded_context stores start/count in min/max_index for single draws. */
3208 multi[num_draws].start = next->info.min_index;
3209 multi[num_draws].count = next->info.max_index;
3210 multi[num_draws].index_bias = next->index_bias;
3211 index_bias_varies |= first->index_bias != next->index_bias;
3212 }
3213
3214 first->info.index_bias_varies = index_bias_varies;
3215 pipe->draw_vbo(pipe, &first->info, 0, NULL, multi, num_draws);
3216
3217 /* Since all draws use the same index buffer, drop all references at once. */
3218 if (first->info.index_size)
3219 pipe_drop_resource_references(first->info.index.resource, num_draws);
3220
3221 return call_size(tc_draw_single) * num_draws;
3222 }
3223 }
3224
3225 /* u_threaded_context stores start/count in min/max_index for single draws. */
3226 /* Drivers using u_threaded_context shouldn't use min/max_index. */
3227 struct pipe_draw_start_count_bias draw;
3228
3229 draw.start = first->info.min_index;
3230 draw.count = first->info.max_index;
3231 draw.index_bias = first->index_bias;
3232
3233 first->info.index_bounds_valid = false;
3234 first->info.has_user_indices = false;
3235 first->info.take_index_buffer_ownership = false;
3236
3237 pipe->draw_vbo(pipe, &first->info, 0, NULL, &draw, 1);
3238 if (first->info.index_size)
3239 tc_drop_resource_reference(first->info.index.resource);
3240
3241 return call_size(tc_draw_single);
3242 }
3243
3244 struct tc_draw_indirect {
3245 struct tc_call_base base;
3246 struct pipe_draw_start_count_bias draw;
3247 struct pipe_draw_info info;
3248 struct pipe_draw_indirect_info indirect;
3249 };
3250
3251 static uint16_t
tc_call_draw_indirect(struct pipe_context * pipe,void * call,uint64_t * last)3252 tc_call_draw_indirect(struct pipe_context *pipe, void *call, uint64_t *last)
3253 {
3254 struct tc_draw_indirect *info = to_call(call, tc_draw_indirect);
3255
3256 info->info.index_bounds_valid = false;
3257 info->info.take_index_buffer_ownership = false;
3258
3259 pipe->draw_vbo(pipe, &info->info, 0, &info->indirect, &info->draw, 1);
3260 if (info->info.index_size)
3261 tc_drop_resource_reference(info->info.index.resource);
3262
3263 tc_drop_resource_reference(info->indirect.buffer);
3264 tc_drop_resource_reference(info->indirect.indirect_draw_count);
3265 tc_drop_so_target_reference(info->indirect.count_from_stream_output);
3266 return call_size(tc_draw_indirect);
3267 }
3268
3269 struct tc_draw_multi {
3270 struct tc_call_base base;
3271 unsigned num_draws;
3272 struct pipe_draw_info info;
3273 struct pipe_draw_start_count_bias slot[]; /* variable-sized array */
3274 };
3275
3276 static uint16_t
tc_call_draw_multi(struct pipe_context * pipe,void * call,uint64_t * last)3277 tc_call_draw_multi(struct pipe_context *pipe, void *call, uint64_t *last)
3278 {
3279 struct tc_draw_multi *info = (struct tc_draw_multi*)call;
3280
3281 info->info.has_user_indices = false;
3282 info->info.index_bounds_valid = false;
3283 info->info.take_index_buffer_ownership = false;
3284
3285 pipe->draw_vbo(pipe, &info->info, 0, NULL, info->slot, info->num_draws);
3286 if (info->info.index_size)
3287 tc_drop_resource_reference(info->info.index.resource);
3288
3289 return info->base.num_slots;
3290 }
3291
3292 #define DRAW_INFO_SIZE_WITHOUT_INDEXBUF_AND_MIN_MAX_INDEX \
3293 offsetof(struct pipe_draw_info, index)
3294
3295 void
tc_draw_vbo(struct pipe_context * _pipe,const struct pipe_draw_info * info,unsigned drawid_offset,const struct pipe_draw_indirect_info * indirect,const struct pipe_draw_start_count_bias * draws,unsigned num_draws)3296 tc_draw_vbo(struct pipe_context *_pipe, const struct pipe_draw_info *info,
3297 unsigned drawid_offset,
3298 const struct pipe_draw_indirect_info *indirect,
3299 const struct pipe_draw_start_count_bias *draws,
3300 unsigned num_draws)
3301 {
3302 STATIC_ASSERT(DRAW_INFO_SIZE_WITHOUT_INDEXBUF_AND_MIN_MAX_INDEX +
3303 sizeof(intptr_t) == offsetof(struct pipe_draw_info, min_index));
3304
3305 struct threaded_context *tc = threaded_context(_pipe);
3306 unsigned index_size = info->index_size;
3307 bool has_user_indices = info->has_user_indices;
3308
3309 if (unlikely(indirect)) {
3310 assert(!has_user_indices);
3311 assert(num_draws == 1);
3312
3313 struct tc_draw_indirect *p =
3314 tc_add_call(tc, TC_CALL_draw_indirect, tc_draw_indirect);
3315 struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
3316
3317 if (index_size) {
3318 if (!info->take_index_buffer_ownership) {
3319 tc_set_resource_reference(&p->info.index.resource,
3320 info->index.resource);
3321 }
3322 tc_add_to_buffer_list(tc, next, info->index.resource);
3323 }
3324 memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX);
3325
3326 tc_set_resource_reference(&p->indirect.buffer, indirect->buffer);
3327 tc_set_resource_reference(&p->indirect.indirect_draw_count,
3328 indirect->indirect_draw_count);
3329 p->indirect.count_from_stream_output = NULL;
3330 pipe_so_target_reference(&p->indirect.count_from_stream_output,
3331 indirect->count_from_stream_output);
3332
3333 if (indirect->buffer)
3334 tc_add_to_buffer_list(tc, next, indirect->buffer);
3335 if (indirect->indirect_draw_count)
3336 tc_add_to_buffer_list(tc, next, indirect->indirect_draw_count);
3337 if (indirect->count_from_stream_output)
3338 tc_add_to_buffer_list(tc, next, indirect->count_from_stream_output->buffer);
3339
3340 memcpy(&p->indirect, indirect, sizeof(*indirect));
3341 p->draw.start = draws[0].start;
3342
3343 /* This must be after tc_add_call, which can flush the batch. */
3344 if (unlikely(tc->add_all_gfx_bindings_to_buffer_list))
3345 tc_add_all_gfx_bindings_to_buffer_list(tc);
3346 return;
3347 }
3348
3349 if (num_draws == 1) {
3350 /* Single draw. */
3351 if (index_size && has_user_indices) {
3352 unsigned size = draws[0].count * index_size;
3353 struct pipe_resource *buffer = NULL;
3354 unsigned offset;
3355
3356 if (!size)
3357 return;
3358
3359 /* This must be done before adding draw_vbo, because it could generate
3360 * e.g. transfer_unmap and flush partially-uninitialized draw_vbo
3361 * to the driver if it was done afterwards.
3362 */
3363 u_upload_data(tc->base.stream_uploader, 0, size, 4,
3364 (uint8_t*)info->index.user + draws[0].start * index_size,
3365 &offset, &buffer);
3366 if (unlikely(!buffer))
3367 return;
3368
3369 struct tc_draw_single *p = drawid_offset > 0 ?
3370 &tc_add_call(tc, TC_CALL_draw_single_drawid, tc_draw_single_drawid)->base :
3371 tc_add_call(tc, TC_CALL_draw_single, tc_draw_single);
3372 memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_INDEXBUF_AND_MIN_MAX_INDEX);
3373 p->info.index.resource = buffer;
3374 if (drawid_offset > 0)
3375 ((struct tc_draw_single_drawid*)p)->drawid_offset = drawid_offset;
3376 /* u_threaded_context stores start/count in min/max_index for single draws. */
3377 p->info.min_index = offset >> util_logbase2(index_size);
3378 p->info.max_index = draws[0].count;
3379 p->index_bias = draws[0].index_bias;
3380 } else {
3381 /* Non-indexed call or indexed with a real index buffer. */
3382 struct tc_draw_single *p = drawid_offset > 0 ?
3383 &tc_add_call(tc, TC_CALL_draw_single_drawid, tc_draw_single_drawid)->base :
3384 tc_add_call(tc, TC_CALL_draw_single, tc_draw_single);
3385 if (index_size) {
3386 if (!info->take_index_buffer_ownership) {
3387 tc_set_resource_reference(&p->info.index.resource,
3388 info->index.resource);
3389 }
3390 tc_add_to_buffer_list(tc, &tc->buffer_lists[tc->next_buf_list], info->index.resource);
3391 }
3392 if (drawid_offset > 0)
3393 ((struct tc_draw_single_drawid*)p)->drawid_offset = drawid_offset;
3394 memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX);
3395 /* u_threaded_context stores start/count in min/max_index for single draws. */
3396 p->info.min_index = draws[0].start;
3397 p->info.max_index = draws[0].count;
3398 p->index_bias = draws[0].index_bias;
3399 }
3400
3401 /* This must be after tc_add_call, which can flush the batch. */
3402 if (unlikely(tc->add_all_gfx_bindings_to_buffer_list))
3403 tc_add_all_gfx_bindings_to_buffer_list(tc);
3404 return;
3405 }
3406
3407 const int draw_overhead_bytes = sizeof(struct tc_draw_multi);
3408 const int one_draw_slot_bytes = sizeof(((struct tc_draw_multi*)NULL)->slot[0]);
3409 const int slots_for_one_draw = DIV_ROUND_UP(draw_overhead_bytes + one_draw_slot_bytes,
3410 sizeof(struct tc_call_base));
3411 /* Multi draw. */
3412 if (index_size && has_user_indices) {
3413 struct pipe_resource *buffer = NULL;
3414 unsigned buffer_offset, total_count = 0;
3415 unsigned index_size_shift = util_logbase2(index_size);
3416 uint8_t *ptr = NULL;
3417
3418 /* Get the total count. */
3419 for (unsigned i = 0; i < num_draws; i++)
3420 total_count += draws[i].count;
3421
3422 if (!total_count)
3423 return;
3424
3425 /* Allocate space for all index buffers.
3426 *
3427 * This must be done before adding draw_vbo, because it could generate
3428 * e.g. transfer_unmap and flush partially-uninitialized draw_vbo
3429 * to the driver if it was done afterwards.
3430 */
3431 u_upload_alloc(tc->base.stream_uploader, 0,
3432 total_count << index_size_shift, 4,
3433 &buffer_offset, &buffer, (void**)&ptr);
3434 if (unlikely(!buffer))
3435 return;
3436
3437 int total_offset = 0;
3438 unsigned offset = 0;
3439 while (num_draws) {
3440 struct tc_batch *next = &tc->batch_slots[tc->next];
3441
3442 int nb_slots_left = TC_SLOTS_PER_BATCH - next->num_total_slots;
3443 /* If there isn't enough place for one draw, try to fill the next one */
3444 if (nb_slots_left < slots_for_one_draw)
3445 nb_slots_left = TC_SLOTS_PER_BATCH;
3446 const int size_left_bytes = nb_slots_left * sizeof(struct tc_call_base);
3447
3448 /* How many draws can we fit in the current batch */
3449 const int dr = MIN2(num_draws, (size_left_bytes - draw_overhead_bytes) / one_draw_slot_bytes);
3450
3451 struct tc_draw_multi *p =
3452 tc_add_slot_based_call(tc, TC_CALL_draw_multi, tc_draw_multi,
3453 dr);
3454 memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_INDEXBUF_AND_MIN_MAX_INDEX);
3455
3456 if (total_offset == 0)
3457 /* the first slot inherits the reference from u_upload_alloc() */
3458 p->info.index.resource = buffer;
3459 else
3460 /* all following slots need a new reference */
3461 tc_set_resource_reference(&p->info.index.resource, buffer);
3462
3463 p->num_draws = dr;
3464
3465 /* Upload index buffers. */
3466 for (unsigned i = 0; i < dr; i++) {
3467 unsigned count = draws[i + total_offset].count;
3468
3469 if (!count) {
3470 p->slot[i].start = 0;
3471 p->slot[i].count = 0;
3472 p->slot[i].index_bias = 0;
3473 continue;
3474 }
3475
3476 unsigned size = count << index_size_shift;
3477 memcpy(ptr + offset,
3478 (uint8_t*)info->index.user +
3479 (draws[i + total_offset].start << index_size_shift), size);
3480 p->slot[i].start = (buffer_offset + offset) >> index_size_shift;
3481 p->slot[i].count = count;
3482 p->slot[i].index_bias = draws[i + total_offset].index_bias;
3483 offset += size;
3484 }
3485
3486 total_offset += dr;
3487 num_draws -= dr;
3488 }
3489 } else {
3490 int total_offset = 0;
3491 bool take_index_buffer_ownership = info->take_index_buffer_ownership;
3492 while (num_draws) {
3493 struct tc_batch *next = &tc->batch_slots[tc->next];
3494
3495 int nb_slots_left = TC_SLOTS_PER_BATCH - next->num_total_slots;
3496 /* If there isn't enough place for one draw, try to fill the next one */
3497 if (nb_slots_left < slots_for_one_draw)
3498 nb_slots_left = TC_SLOTS_PER_BATCH;
3499 const int size_left_bytes = nb_slots_left * sizeof(struct tc_call_base);
3500
3501 /* How many draws can we fit in the current batch */
3502 const int dr = MIN2(num_draws, (size_left_bytes - draw_overhead_bytes) / one_draw_slot_bytes);
3503
3504 /* Non-indexed call or indexed with a real index buffer. */
3505 struct tc_draw_multi *p =
3506 tc_add_slot_based_call(tc, TC_CALL_draw_multi, tc_draw_multi,
3507 dr);
3508 if (index_size) {
3509 if (!take_index_buffer_ownership) {
3510 tc_set_resource_reference(&p->info.index.resource,
3511 info->index.resource);
3512 }
3513 tc_add_to_buffer_list(tc, &tc->buffer_lists[tc->next_buf_list], info->index.resource);
3514 }
3515 take_index_buffer_ownership = false;
3516 memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX);
3517 p->num_draws = dr;
3518 memcpy(p->slot, &draws[total_offset], sizeof(draws[0]) * dr);
3519 num_draws -= dr;
3520
3521 total_offset += dr;
3522 }
3523 }
3524
3525 /* This must be after tc_add_*call, which can flush the batch. */
3526 if (unlikely(tc->add_all_gfx_bindings_to_buffer_list))
3527 tc_add_all_gfx_bindings_to_buffer_list(tc);
3528 }
3529
3530 struct tc_draw_vstate_single {
3531 struct tc_call_base base;
3532 struct pipe_draw_start_count_bias draw;
3533
3534 /* The following states must be together without holes because they are
3535 * compared by draw merging.
3536 */
3537 struct pipe_vertex_state *state;
3538 uint32_t partial_velem_mask;
3539 struct pipe_draw_vertex_state_info info;
3540 };
3541
3542 static bool
is_next_call_a_mergeable_draw_vstate(struct tc_draw_vstate_single * first,struct tc_draw_vstate_single * next)3543 is_next_call_a_mergeable_draw_vstate(struct tc_draw_vstate_single *first,
3544 struct tc_draw_vstate_single *next)
3545 {
3546 if (next->base.call_id != TC_CALL_draw_vstate_single)
3547 return false;
3548
3549 return !memcmp(&first->state, &next->state,
3550 offsetof(struct tc_draw_vstate_single, info) +
3551 sizeof(struct pipe_draw_vertex_state_info) -
3552 offsetof(struct tc_draw_vstate_single, state));
3553 }
3554
3555 static uint16_t
tc_call_draw_vstate_single(struct pipe_context * pipe,void * call,uint64_t * last_ptr)3556 tc_call_draw_vstate_single(struct pipe_context *pipe, void *call, uint64_t *last_ptr)
3557 {
3558 /* Draw call merging. */
3559 struct tc_draw_vstate_single *first = to_call(call, tc_draw_vstate_single);
3560 struct tc_draw_vstate_single *last = (struct tc_draw_vstate_single *)last_ptr;
3561 struct tc_draw_vstate_single *next = get_next_call(first, tc_draw_vstate_single);
3562
3563 /* If at least 2 consecutive draw calls can be merged... */
3564 if (next != last &&
3565 is_next_call_a_mergeable_draw_vstate(first, next)) {
3566 /* The maximum number of merged draws is given by the batch size. */
3567 struct pipe_draw_start_count_bias draws[TC_SLOTS_PER_BATCH /
3568 call_size(tc_draw_vstate_single)];
3569 unsigned num_draws = 2;
3570
3571 draws[0] = first->draw;
3572 draws[1] = next->draw;
3573
3574 /* Find how many other draws can be merged. */
3575 next = get_next_call(next, tc_draw_vstate_single);
3576 for (; next != last &&
3577 is_next_call_a_mergeable_draw_vstate(first, next);
3578 next = get_next_call(next, tc_draw_vstate_single),
3579 num_draws++)
3580 draws[num_draws] = next->draw;
3581
3582 pipe->draw_vertex_state(pipe, first->state, first->partial_velem_mask,
3583 first->info, draws, num_draws);
3584 /* Since all draws use the same state, drop all references at once. */
3585 tc_drop_vertex_state_references(first->state, num_draws);
3586
3587 return call_size(tc_draw_vstate_single) * num_draws;
3588 }
3589
3590 pipe->draw_vertex_state(pipe, first->state, first->partial_velem_mask,
3591 first->info, &first->draw, 1);
3592 tc_drop_vertex_state_references(first->state, 1);
3593 return call_size(tc_draw_vstate_single);
3594 }
3595
3596 struct tc_draw_vstate_multi {
3597 struct tc_call_base base;
3598 uint32_t partial_velem_mask;
3599 struct pipe_draw_vertex_state_info info;
3600 unsigned num_draws;
3601 struct pipe_vertex_state *state;
3602 struct pipe_draw_start_count_bias slot[0];
3603 };
3604
3605 static uint16_t
tc_call_draw_vstate_multi(struct pipe_context * pipe,void * call,uint64_t * last)3606 tc_call_draw_vstate_multi(struct pipe_context *pipe, void *call, uint64_t *last)
3607 {
3608 struct tc_draw_vstate_multi *info = (struct tc_draw_vstate_multi*)call;
3609
3610 pipe->draw_vertex_state(pipe, info->state, info->partial_velem_mask,
3611 info->info, info->slot, info->num_draws);
3612 tc_drop_vertex_state_references(info->state, 1);
3613 return info->base.num_slots;
3614 }
3615
3616 static void
tc_draw_vertex_state(struct pipe_context * _pipe,struct pipe_vertex_state * state,uint32_t partial_velem_mask,struct pipe_draw_vertex_state_info info,const struct pipe_draw_start_count_bias * draws,unsigned num_draws)3617 tc_draw_vertex_state(struct pipe_context *_pipe,
3618 struct pipe_vertex_state *state,
3619 uint32_t partial_velem_mask,
3620 struct pipe_draw_vertex_state_info info,
3621 const struct pipe_draw_start_count_bias *draws,
3622 unsigned num_draws)
3623 {
3624 struct threaded_context *tc = threaded_context(_pipe);
3625
3626 if (num_draws == 1) {
3627 /* Single draw. */
3628 struct tc_draw_vstate_single *p =
3629 tc_add_call(tc, TC_CALL_draw_vstate_single, tc_draw_vstate_single);
3630 p->partial_velem_mask = partial_velem_mask;
3631 p->draw = draws[0];
3632 p->info.mode = info.mode;
3633 p->info.take_vertex_state_ownership = false;
3634
3635 /* This should be always 0 for simplicity because we assume that
3636 * index_bias doesn't vary.
3637 */
3638 assert(draws[0].index_bias == 0);
3639
3640 if (!info.take_vertex_state_ownership)
3641 tc_set_vertex_state_reference(&p->state, state);
3642 else
3643 p->state = state;
3644
3645
3646 /* This must be after tc_add_*call, which can flush the batch. */
3647 if (unlikely(tc->add_all_gfx_bindings_to_buffer_list))
3648 tc_add_all_gfx_bindings_to_buffer_list(tc);
3649 return;
3650 }
3651
3652 const int draw_overhead_bytes = sizeof(struct tc_draw_vstate_multi);
3653 const int one_draw_slot_bytes = sizeof(((struct tc_draw_vstate_multi*)NULL)->slot[0]);
3654 const int slots_for_one_draw = DIV_ROUND_UP(draw_overhead_bytes + one_draw_slot_bytes,
3655 sizeof(struct tc_call_base));
3656 /* Multi draw. */
3657 int total_offset = 0;
3658 bool take_vertex_state_ownership = info.take_vertex_state_ownership;
3659 while (num_draws) {
3660 struct tc_batch *next = &tc->batch_slots[tc->next];
3661
3662 int nb_slots_left = TC_SLOTS_PER_BATCH - next->num_total_slots;
3663 /* If there isn't enough place for one draw, try to fill the next one */
3664 if (nb_slots_left < slots_for_one_draw)
3665 nb_slots_left = TC_SLOTS_PER_BATCH;
3666 const int size_left_bytes = nb_slots_left * sizeof(struct tc_call_base);
3667
3668 /* How many draws can we fit in the current batch */
3669 const int dr = MIN2(num_draws, (size_left_bytes - draw_overhead_bytes) / one_draw_slot_bytes);
3670
3671 /* Non-indexed call or indexed with a real index buffer. */
3672 struct tc_draw_vstate_multi *p =
3673 tc_add_slot_based_call(tc, TC_CALL_draw_vstate_multi, tc_draw_vstate_multi, dr);
3674
3675 if (!take_vertex_state_ownership)
3676 tc_set_vertex_state_reference(&p->state, state);
3677 else
3678 p->state = state;
3679
3680 take_vertex_state_ownership = false;
3681 p->partial_velem_mask = partial_velem_mask;
3682 p->info.mode = info.mode;
3683 p->info.take_vertex_state_ownership = false;
3684 p->num_draws = dr;
3685 memcpy(p->slot, &draws[total_offset], sizeof(draws[0]) * dr);
3686 num_draws -= dr;
3687
3688 total_offset += dr;
3689 }
3690
3691
3692 /* This must be after tc_add_*call, which can flush the batch. */
3693 if (unlikely(tc->add_all_gfx_bindings_to_buffer_list))
3694 tc_add_all_gfx_bindings_to_buffer_list(tc);
3695 }
3696
3697 struct tc_launch_grid_call {
3698 struct tc_call_base base;
3699 struct pipe_grid_info info;
3700 };
3701
3702 static uint16_t
tc_call_launch_grid(struct pipe_context * pipe,void * call,uint64_t * last)3703 tc_call_launch_grid(struct pipe_context *pipe, void *call, uint64_t *last)
3704 {
3705 struct pipe_grid_info *p = &to_call(call, tc_launch_grid_call)->info;
3706
3707 pipe->launch_grid(pipe, p);
3708 tc_drop_resource_reference(p->indirect);
3709 return call_size(tc_launch_grid_call);
3710 }
3711
3712 static void
tc_launch_grid(struct pipe_context * _pipe,const struct pipe_grid_info * info)3713 tc_launch_grid(struct pipe_context *_pipe,
3714 const struct pipe_grid_info *info)
3715 {
3716 struct threaded_context *tc = threaded_context(_pipe);
3717 struct tc_launch_grid_call *p = tc_add_call(tc, TC_CALL_launch_grid,
3718 tc_launch_grid_call);
3719 assert(info->input == NULL);
3720
3721 tc_set_resource_reference(&p->info.indirect, info->indirect);
3722 memcpy(&p->info, info, sizeof(*info));
3723
3724 if (info->indirect)
3725 tc_add_to_buffer_list(tc, &tc->buffer_lists[tc->next_buf_list], info->indirect);
3726
3727 /* This must be after tc_add_*call, which can flush the batch. */
3728 if (unlikely(tc->add_all_compute_bindings_to_buffer_list))
3729 tc_add_all_compute_bindings_to_buffer_list(tc);
3730 }
3731
3732 static uint16_t
tc_call_resource_copy_region(struct pipe_context * pipe,void * call,uint64_t * last)3733 tc_call_resource_copy_region(struct pipe_context *pipe, void *call, uint64_t *last)
3734 {
3735 struct tc_resource_copy_region *p = to_call(call, tc_resource_copy_region);
3736
3737 pipe->resource_copy_region(pipe, p->dst, p->dst_level, p->dstx, p->dsty,
3738 p->dstz, p->src, p->src_level, &p->src_box);
3739 tc_drop_resource_reference(p->dst);
3740 tc_drop_resource_reference(p->src);
3741 return call_size(tc_resource_copy_region);
3742 }
3743
3744 static void
tc_resource_copy_region(struct pipe_context * _pipe,struct pipe_resource * dst,unsigned dst_level,unsigned dstx,unsigned dsty,unsigned dstz,struct pipe_resource * src,unsigned src_level,const struct pipe_box * src_box)3745 tc_resource_copy_region(struct pipe_context *_pipe,
3746 struct pipe_resource *dst, unsigned dst_level,
3747 unsigned dstx, unsigned dsty, unsigned dstz,
3748 struct pipe_resource *src, unsigned src_level,
3749 const struct pipe_box *src_box)
3750 {
3751 struct threaded_context *tc = threaded_context(_pipe);
3752 struct threaded_resource *tdst = threaded_resource(dst);
3753 struct tc_resource_copy_region *p =
3754 tc_add_call(tc, TC_CALL_resource_copy_region,
3755 tc_resource_copy_region);
3756
3757 if (dst->target == PIPE_BUFFER)
3758 tc_buffer_disable_cpu_storage(dst);
3759
3760 tc_set_resource_reference(&p->dst, dst);
3761 p->dst_level = dst_level;
3762 p->dstx = dstx;
3763 p->dsty = dsty;
3764 p->dstz = dstz;
3765 tc_set_resource_reference(&p->src, src);
3766 p->src_level = src_level;
3767 p->src_box = *src_box;
3768
3769 if (dst->target == PIPE_BUFFER) {
3770 struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
3771
3772 tc_add_to_buffer_list(tc, next, src);
3773 tc_add_to_buffer_list(tc, next, dst);
3774
3775 util_range_add(&tdst->b, &tdst->valid_buffer_range,
3776 dstx, dstx + src_box->width);
3777 }
3778 }
3779
3780 struct tc_blit_call {
3781 struct tc_call_base base;
3782 struct pipe_blit_info info;
3783 };
3784
3785 static uint16_t
tc_call_blit(struct pipe_context * pipe,void * call,uint64_t * last)3786 tc_call_blit(struct pipe_context *pipe, void *call, uint64_t *last)
3787 {
3788 struct pipe_blit_info *blit = &to_call(call, tc_blit_call)->info;
3789
3790 pipe->blit(pipe, blit);
3791 tc_drop_resource_reference(blit->dst.resource);
3792 tc_drop_resource_reference(blit->src.resource);
3793 return call_size(tc_blit_call);
3794 }
3795
3796 static void
tc_blit(struct pipe_context * _pipe,const struct pipe_blit_info * info)3797 tc_blit(struct pipe_context *_pipe, const struct pipe_blit_info *info)
3798 {
3799 struct threaded_context *tc = threaded_context(_pipe);
3800 struct tc_blit_call *blit = tc_add_call(tc, TC_CALL_blit, tc_blit_call);
3801
3802 tc_set_resource_reference(&blit->info.dst.resource, info->dst.resource);
3803 tc_set_resource_reference(&blit->info.src.resource, info->src.resource);
3804 memcpy(&blit->info, info, sizeof(*info));
3805 }
3806
3807 struct tc_generate_mipmap {
3808 struct tc_call_base base;
3809 enum pipe_format format;
3810 unsigned base_level;
3811 unsigned last_level;
3812 unsigned first_layer;
3813 unsigned last_layer;
3814 struct pipe_resource *res;
3815 };
3816
3817 static uint16_t
tc_call_generate_mipmap(struct pipe_context * pipe,void * call,uint64_t * last)3818 tc_call_generate_mipmap(struct pipe_context *pipe, void *call, uint64_t *last)
3819 {
3820 struct tc_generate_mipmap *p = to_call(call, tc_generate_mipmap);
3821 ASSERTED bool result = pipe->generate_mipmap(pipe, p->res, p->format,
3822 p->base_level,
3823 p->last_level,
3824 p->first_layer,
3825 p->last_layer);
3826 assert(result);
3827 tc_drop_resource_reference(p->res);
3828 return call_size(tc_generate_mipmap);
3829 }
3830
3831 static bool
tc_generate_mipmap(struct pipe_context * _pipe,struct pipe_resource * res,enum pipe_format format,unsigned base_level,unsigned last_level,unsigned first_layer,unsigned last_layer)3832 tc_generate_mipmap(struct pipe_context *_pipe,
3833 struct pipe_resource *res,
3834 enum pipe_format format,
3835 unsigned base_level,
3836 unsigned last_level,
3837 unsigned first_layer,
3838 unsigned last_layer)
3839 {
3840 struct threaded_context *tc = threaded_context(_pipe);
3841 struct pipe_context *pipe = tc->pipe;
3842 struct pipe_screen *screen = pipe->screen;
3843 unsigned bind = PIPE_BIND_SAMPLER_VIEW;
3844
3845 if (util_format_is_depth_or_stencil(format))
3846 bind = PIPE_BIND_DEPTH_STENCIL;
3847 else
3848 bind = PIPE_BIND_RENDER_TARGET;
3849
3850 if (!screen->is_format_supported(screen, format, res->target,
3851 res->nr_samples, res->nr_storage_samples,
3852 bind))
3853 return false;
3854
3855 struct tc_generate_mipmap *p =
3856 tc_add_call(tc, TC_CALL_generate_mipmap, tc_generate_mipmap);
3857
3858 tc_set_resource_reference(&p->res, res);
3859 p->format = format;
3860 p->base_level = base_level;
3861 p->last_level = last_level;
3862 p->first_layer = first_layer;
3863 p->last_layer = last_layer;
3864 return true;
3865 }
3866
3867 struct tc_resource_call {
3868 struct tc_call_base base;
3869 struct pipe_resource *resource;
3870 };
3871
3872 static uint16_t
tc_call_flush_resource(struct pipe_context * pipe,void * call,uint64_t * last)3873 tc_call_flush_resource(struct pipe_context *pipe, void *call, uint64_t *last)
3874 {
3875 struct pipe_resource *resource = to_call(call, tc_resource_call)->resource;
3876
3877 pipe->flush_resource(pipe, resource);
3878 tc_drop_resource_reference(resource);
3879 return call_size(tc_resource_call);
3880 }
3881
3882 static void
tc_flush_resource(struct pipe_context * _pipe,struct pipe_resource * resource)3883 tc_flush_resource(struct pipe_context *_pipe, struct pipe_resource *resource)
3884 {
3885 struct threaded_context *tc = threaded_context(_pipe);
3886 struct tc_resource_call *call = tc_add_call(tc, TC_CALL_flush_resource,
3887 tc_resource_call);
3888
3889 tc_set_resource_reference(&call->resource, resource);
3890 }
3891
3892 static uint16_t
tc_call_invalidate_resource(struct pipe_context * pipe,void * call,uint64_t * last)3893 tc_call_invalidate_resource(struct pipe_context *pipe, void *call, uint64_t *last)
3894 {
3895 struct pipe_resource *resource = to_call(call, tc_resource_call)->resource;
3896
3897 pipe->invalidate_resource(pipe, resource);
3898 tc_drop_resource_reference(resource);
3899 return call_size(tc_resource_call);
3900 }
3901
3902 static void
tc_invalidate_resource(struct pipe_context * _pipe,struct pipe_resource * resource)3903 tc_invalidate_resource(struct pipe_context *_pipe,
3904 struct pipe_resource *resource)
3905 {
3906 struct threaded_context *tc = threaded_context(_pipe);
3907
3908 if (resource->target == PIPE_BUFFER) {
3909 /* This can fail, in which case we simply ignore the invalidation request. */
3910 struct threaded_resource *tbuf = threaded_resource(resource);
3911 tc_touch_buffer(tc, tbuf);
3912 tc_invalidate_buffer(tc, tbuf);
3913 return;
3914 }
3915
3916 struct tc_resource_call *call = tc_add_call(tc, TC_CALL_invalidate_resource,
3917 tc_resource_call);
3918 tc_set_resource_reference(&call->resource, resource);
3919 }
3920
3921 struct tc_clear {
3922 struct tc_call_base base;
3923 bool scissor_state_set;
3924 uint8_t stencil;
3925 uint16_t buffers;
3926 float depth;
3927 struct pipe_scissor_state scissor_state;
3928 union pipe_color_union color;
3929 };
3930
3931 static uint16_t
tc_call_clear(struct pipe_context * pipe,void * call,uint64_t * last)3932 tc_call_clear(struct pipe_context *pipe, void *call, uint64_t *last)
3933 {
3934 struct tc_clear *p = to_call(call, tc_clear);
3935
3936 pipe->clear(pipe, p->buffers, p->scissor_state_set ? &p->scissor_state : NULL, &p->color, p->depth, p->stencil);
3937 return call_size(tc_clear);
3938 }
3939
3940 static void
tc_clear(struct pipe_context * _pipe,unsigned buffers,const struct pipe_scissor_state * scissor_state,const union pipe_color_union * color,double depth,unsigned stencil)3941 tc_clear(struct pipe_context *_pipe, unsigned buffers, const struct pipe_scissor_state *scissor_state,
3942 const union pipe_color_union *color, double depth,
3943 unsigned stencil)
3944 {
3945 struct threaded_context *tc = threaded_context(_pipe);
3946 struct tc_clear *p = tc_add_call(tc, TC_CALL_clear, tc_clear);
3947
3948 p->buffers = buffers;
3949 if (scissor_state)
3950 p->scissor_state = *scissor_state;
3951 p->scissor_state_set = !!scissor_state;
3952 p->color = *color;
3953 p->depth = depth;
3954 p->stencil = stencil;
3955 }
3956
3957 struct tc_clear_render_target {
3958 struct tc_call_base base;
3959 bool render_condition_enabled;
3960 unsigned dstx;
3961 unsigned dsty;
3962 unsigned width;
3963 unsigned height;
3964 union pipe_color_union color;
3965 struct pipe_surface *dst;
3966 };
3967
3968 static uint16_t
tc_call_clear_render_target(struct pipe_context * pipe,void * call,uint64_t * last)3969 tc_call_clear_render_target(struct pipe_context *pipe, void *call, uint64_t *last)
3970 {
3971 struct tc_clear_render_target *p = to_call(call, tc_clear_render_target);
3972
3973 pipe->clear_render_target(pipe, p->dst, &p->color, p->dstx, p->dsty, p->width, p->height,
3974 p->render_condition_enabled);
3975 tc_drop_surface_reference(p->dst);
3976 return call_size(tc_clear_render_target);
3977 }
3978
3979 static void
tc_clear_render_target(struct pipe_context * _pipe,struct pipe_surface * dst,const union pipe_color_union * color,unsigned dstx,unsigned dsty,unsigned width,unsigned height,bool render_condition_enabled)3980 tc_clear_render_target(struct pipe_context *_pipe,
3981 struct pipe_surface *dst,
3982 const union pipe_color_union *color,
3983 unsigned dstx, unsigned dsty,
3984 unsigned width, unsigned height,
3985 bool render_condition_enabled)
3986 {
3987 struct threaded_context *tc = threaded_context(_pipe);
3988 struct tc_clear_render_target *p = tc_add_call(tc, TC_CALL_clear_render_target, tc_clear_render_target);
3989 p->dst = NULL;
3990 pipe_surface_reference(&p->dst, dst);
3991 p->color = *color;
3992 p->dstx = dstx;
3993 p->dsty = dsty;
3994 p->width = width;
3995 p->height = height;
3996 p->render_condition_enabled = render_condition_enabled;
3997 }
3998
3999
4000 struct tc_clear_depth_stencil {
4001 struct tc_call_base base;
4002 bool render_condition_enabled;
4003 float depth;
4004 unsigned clear_flags;
4005 unsigned stencil;
4006 unsigned dstx;
4007 unsigned dsty;
4008 unsigned width;
4009 unsigned height;
4010 struct pipe_surface *dst;
4011 };
4012
4013
4014 static uint16_t
tc_call_clear_depth_stencil(struct pipe_context * pipe,void * call,uint64_t * last)4015 tc_call_clear_depth_stencil(struct pipe_context *pipe, void *call, uint64_t *last)
4016 {
4017 struct tc_clear_depth_stencil *p = to_call(call, tc_clear_depth_stencil);
4018
4019 pipe->clear_depth_stencil(pipe, p->dst, p->clear_flags, p->depth, p->stencil,
4020 p->dstx, p->dsty, p->width, p->height,
4021 p->render_condition_enabled);
4022 tc_drop_surface_reference(p->dst);
4023 return call_size(tc_clear_depth_stencil);
4024 }
4025
4026 static void
tc_clear_depth_stencil(struct pipe_context * _pipe,struct pipe_surface * dst,unsigned clear_flags,double depth,unsigned stencil,unsigned dstx,unsigned dsty,unsigned width,unsigned height,bool render_condition_enabled)4027 tc_clear_depth_stencil(struct pipe_context *_pipe,
4028 struct pipe_surface *dst, unsigned clear_flags,
4029 double depth, unsigned stencil, unsigned dstx,
4030 unsigned dsty, unsigned width, unsigned height,
4031 bool render_condition_enabled)
4032 {
4033 struct threaded_context *tc = threaded_context(_pipe);
4034 struct tc_clear_depth_stencil *p = tc_add_call(tc, TC_CALL_clear_depth_stencil, tc_clear_depth_stencil);
4035 p->dst = NULL;
4036 pipe_surface_reference(&p->dst, dst);
4037 p->clear_flags = clear_flags;
4038 p->depth = depth;
4039 p->stencil = stencil;
4040 p->dstx = dstx;
4041 p->dsty = dsty;
4042 p->width = width;
4043 p->height = height;
4044 p->render_condition_enabled = render_condition_enabled;
4045 }
4046
4047 struct tc_clear_buffer {
4048 struct tc_call_base base;
4049 uint8_t clear_value_size;
4050 unsigned offset;
4051 unsigned size;
4052 char clear_value[16];
4053 struct pipe_resource *res;
4054 };
4055
4056 static uint16_t
tc_call_clear_buffer(struct pipe_context * pipe,void * call,uint64_t * last)4057 tc_call_clear_buffer(struct pipe_context *pipe, void *call, uint64_t *last)
4058 {
4059 struct tc_clear_buffer *p = to_call(call, tc_clear_buffer);
4060
4061 pipe->clear_buffer(pipe, p->res, p->offset, p->size, p->clear_value,
4062 p->clear_value_size);
4063 tc_drop_resource_reference(p->res);
4064 return call_size(tc_clear_buffer);
4065 }
4066
4067 static void
tc_clear_buffer(struct pipe_context * _pipe,struct pipe_resource * res,unsigned offset,unsigned size,const void * clear_value,int clear_value_size)4068 tc_clear_buffer(struct pipe_context *_pipe, struct pipe_resource *res,
4069 unsigned offset, unsigned size,
4070 const void *clear_value, int clear_value_size)
4071 {
4072 struct threaded_context *tc = threaded_context(_pipe);
4073 struct threaded_resource *tres = threaded_resource(res);
4074 struct tc_clear_buffer *p =
4075 tc_add_call(tc, TC_CALL_clear_buffer, tc_clear_buffer);
4076
4077 tc_buffer_disable_cpu_storage(res);
4078
4079 tc_set_resource_reference(&p->res, res);
4080 tc_add_to_buffer_list(tc, &tc->buffer_lists[tc->next_buf_list], res);
4081 p->offset = offset;
4082 p->size = size;
4083 memcpy(p->clear_value, clear_value, clear_value_size);
4084 p->clear_value_size = clear_value_size;
4085
4086 util_range_add(&tres->b, &tres->valid_buffer_range, offset, offset + size);
4087 }
4088
4089 struct tc_clear_texture {
4090 struct tc_call_base base;
4091 unsigned level;
4092 struct pipe_box box;
4093 char data[16];
4094 struct pipe_resource *res;
4095 };
4096
4097 static uint16_t
tc_call_clear_texture(struct pipe_context * pipe,void * call,uint64_t * last)4098 tc_call_clear_texture(struct pipe_context *pipe, void *call, uint64_t *last)
4099 {
4100 struct tc_clear_texture *p = to_call(call, tc_clear_texture);
4101
4102 pipe->clear_texture(pipe, p->res, p->level, &p->box, p->data);
4103 tc_drop_resource_reference(p->res);
4104 return call_size(tc_clear_texture);
4105 }
4106
4107 static void
tc_clear_texture(struct pipe_context * _pipe,struct pipe_resource * res,unsigned level,const struct pipe_box * box,const void * data)4108 tc_clear_texture(struct pipe_context *_pipe, struct pipe_resource *res,
4109 unsigned level, const struct pipe_box *box, const void *data)
4110 {
4111 struct threaded_context *tc = threaded_context(_pipe);
4112 struct tc_clear_texture *p =
4113 tc_add_call(tc, TC_CALL_clear_texture, tc_clear_texture);
4114
4115 tc_set_resource_reference(&p->res, res);
4116 p->level = level;
4117 p->box = *box;
4118 memcpy(p->data, data,
4119 util_format_get_blocksize(res->format));
4120 }
4121
4122 struct tc_resource_commit {
4123 struct tc_call_base base;
4124 bool commit;
4125 unsigned level;
4126 struct pipe_box box;
4127 struct pipe_resource *res;
4128 };
4129
4130 static uint16_t
tc_call_resource_commit(struct pipe_context * pipe,void * call,uint64_t * last)4131 tc_call_resource_commit(struct pipe_context *pipe, void *call, uint64_t *last)
4132 {
4133 struct tc_resource_commit *p = to_call(call, tc_resource_commit);
4134
4135 pipe->resource_commit(pipe, p->res, p->level, &p->box, p->commit);
4136 tc_drop_resource_reference(p->res);
4137 return call_size(tc_resource_commit);
4138 }
4139
4140 static bool
tc_resource_commit(struct pipe_context * _pipe,struct pipe_resource * res,unsigned level,struct pipe_box * box,bool commit)4141 tc_resource_commit(struct pipe_context *_pipe, struct pipe_resource *res,
4142 unsigned level, struct pipe_box *box, bool commit)
4143 {
4144 struct threaded_context *tc = threaded_context(_pipe);
4145 struct tc_resource_commit *p =
4146 tc_add_call(tc, TC_CALL_resource_commit, tc_resource_commit);
4147
4148 tc_set_resource_reference(&p->res, res);
4149 p->level = level;
4150 p->box = *box;
4151 p->commit = commit;
4152 return true; /* we don't care about the return value for this call */
4153 }
4154
4155 static unsigned
tc_init_intel_perf_query_info(struct pipe_context * _pipe)4156 tc_init_intel_perf_query_info(struct pipe_context *_pipe)
4157 {
4158 struct threaded_context *tc = threaded_context(_pipe);
4159 struct pipe_context *pipe = tc->pipe;
4160
4161 return pipe->init_intel_perf_query_info(pipe);
4162 }
4163
4164 static void
tc_get_intel_perf_query_info(struct pipe_context * _pipe,unsigned query_index,const char ** name,uint32_t * data_size,uint32_t * n_counters,uint32_t * n_active)4165 tc_get_intel_perf_query_info(struct pipe_context *_pipe,
4166 unsigned query_index,
4167 const char **name,
4168 uint32_t *data_size,
4169 uint32_t *n_counters,
4170 uint32_t *n_active)
4171 {
4172 struct threaded_context *tc = threaded_context(_pipe);
4173 struct pipe_context *pipe = tc->pipe;
4174
4175 tc_sync(tc); /* n_active vs begin/end_intel_perf_query */
4176 pipe->get_intel_perf_query_info(pipe, query_index, name, data_size,
4177 n_counters, n_active);
4178 }
4179
4180 static void
tc_get_intel_perf_query_counter_info(struct pipe_context * _pipe,unsigned query_index,unsigned counter_index,const char ** name,const char ** desc,uint32_t * offset,uint32_t * data_size,uint32_t * type_enum,uint32_t * data_type_enum,uint64_t * raw_max)4181 tc_get_intel_perf_query_counter_info(struct pipe_context *_pipe,
4182 unsigned query_index,
4183 unsigned counter_index,
4184 const char **name,
4185 const char **desc,
4186 uint32_t *offset,
4187 uint32_t *data_size,
4188 uint32_t *type_enum,
4189 uint32_t *data_type_enum,
4190 uint64_t *raw_max)
4191 {
4192 struct threaded_context *tc = threaded_context(_pipe);
4193 struct pipe_context *pipe = tc->pipe;
4194
4195 pipe->get_intel_perf_query_counter_info(pipe, query_index, counter_index,
4196 name, desc, offset, data_size, type_enum, data_type_enum, raw_max);
4197 }
4198
4199 static struct pipe_query *
tc_new_intel_perf_query_obj(struct pipe_context * _pipe,unsigned query_index)4200 tc_new_intel_perf_query_obj(struct pipe_context *_pipe, unsigned query_index)
4201 {
4202 struct threaded_context *tc = threaded_context(_pipe);
4203 struct pipe_context *pipe = tc->pipe;
4204
4205 return pipe->new_intel_perf_query_obj(pipe, query_index);
4206 }
4207
4208 static uint16_t
tc_call_begin_intel_perf_query(struct pipe_context * pipe,void * call,uint64_t * last)4209 tc_call_begin_intel_perf_query(struct pipe_context *pipe, void *call, uint64_t *last)
4210 {
4211 (void)pipe->begin_intel_perf_query(pipe, to_call(call, tc_query_call)->query);
4212 return call_size(tc_query_call);
4213 }
4214
4215 static bool
tc_begin_intel_perf_query(struct pipe_context * _pipe,struct pipe_query * q)4216 tc_begin_intel_perf_query(struct pipe_context *_pipe, struct pipe_query *q)
4217 {
4218 struct threaded_context *tc = threaded_context(_pipe);
4219
4220 tc_add_call(tc, TC_CALL_begin_intel_perf_query, tc_query_call)->query = q;
4221
4222 /* assume success, begin failure can be signaled from get_intel_perf_query_data */
4223 return true;
4224 }
4225
4226 static uint16_t
tc_call_end_intel_perf_query(struct pipe_context * pipe,void * call,uint64_t * last)4227 tc_call_end_intel_perf_query(struct pipe_context *pipe, void *call, uint64_t *last)
4228 {
4229 pipe->end_intel_perf_query(pipe, to_call(call, tc_query_call)->query);
4230 return call_size(tc_query_call);
4231 }
4232
4233 static void
tc_end_intel_perf_query(struct pipe_context * _pipe,struct pipe_query * q)4234 tc_end_intel_perf_query(struct pipe_context *_pipe, struct pipe_query *q)
4235 {
4236 struct threaded_context *tc = threaded_context(_pipe);
4237
4238 tc_add_call(tc, TC_CALL_end_intel_perf_query, tc_query_call)->query = q;
4239 }
4240
4241 static void
tc_delete_intel_perf_query(struct pipe_context * _pipe,struct pipe_query * q)4242 tc_delete_intel_perf_query(struct pipe_context *_pipe, struct pipe_query *q)
4243 {
4244 struct threaded_context *tc = threaded_context(_pipe);
4245 struct pipe_context *pipe = tc->pipe;
4246
4247 tc_sync(tc); /* flush potentially pending begin/end_intel_perf_queries */
4248 pipe->delete_intel_perf_query(pipe, q);
4249 }
4250
4251 static void
tc_wait_intel_perf_query(struct pipe_context * _pipe,struct pipe_query * q)4252 tc_wait_intel_perf_query(struct pipe_context *_pipe, struct pipe_query *q)
4253 {
4254 struct threaded_context *tc = threaded_context(_pipe);
4255 struct pipe_context *pipe = tc->pipe;
4256
4257 tc_sync(tc); /* flush potentially pending begin/end_intel_perf_queries */
4258 pipe->wait_intel_perf_query(pipe, q);
4259 }
4260
4261 static bool
tc_is_intel_perf_query_ready(struct pipe_context * _pipe,struct pipe_query * q)4262 tc_is_intel_perf_query_ready(struct pipe_context *_pipe, struct pipe_query *q)
4263 {
4264 struct threaded_context *tc = threaded_context(_pipe);
4265 struct pipe_context *pipe = tc->pipe;
4266
4267 tc_sync(tc); /* flush potentially pending begin/end_intel_perf_queries */
4268 return pipe->is_intel_perf_query_ready(pipe, q);
4269 }
4270
4271 static bool
tc_get_intel_perf_query_data(struct pipe_context * _pipe,struct pipe_query * q,size_t data_size,uint32_t * data,uint32_t * bytes_written)4272 tc_get_intel_perf_query_data(struct pipe_context *_pipe,
4273 struct pipe_query *q,
4274 size_t data_size,
4275 uint32_t *data,
4276 uint32_t *bytes_written)
4277 {
4278 struct threaded_context *tc = threaded_context(_pipe);
4279 struct pipe_context *pipe = tc->pipe;
4280
4281 tc_sync(tc); /* flush potentially pending begin/end_intel_perf_queries */
4282 return pipe->get_intel_perf_query_data(pipe, q, data_size, data, bytes_written);
4283 }
4284
4285 /********************************************************************
4286 * callback
4287 */
4288
4289 struct tc_callback_call {
4290 struct tc_call_base base;
4291 void (*fn)(void *data);
4292 void *data;
4293 };
4294
4295 static uint16_t
tc_call_callback(UNUSED struct pipe_context * pipe,void * call,uint64_t * last)4296 tc_call_callback(UNUSED struct pipe_context *pipe, void *call, uint64_t *last)
4297 {
4298 struct tc_callback_call *p = to_call(call, tc_callback_call);
4299
4300 p->fn(p->data);
4301 return call_size(tc_callback_call);
4302 }
4303
4304 static void
tc_callback(struct pipe_context * _pipe,void (* fn)(void *),void * data,bool asap)4305 tc_callback(struct pipe_context *_pipe, void (*fn)(void *), void *data,
4306 bool asap)
4307 {
4308 struct threaded_context *tc = threaded_context(_pipe);
4309
4310 if (asap && tc_is_sync(tc)) {
4311 fn(data);
4312 return;
4313 }
4314
4315 struct tc_callback_call *p =
4316 tc_add_call(tc, TC_CALL_callback, tc_callback_call);
4317 p->fn = fn;
4318 p->data = data;
4319 }
4320
4321
4322 /********************************************************************
4323 * create & destroy
4324 */
4325
4326 static void
tc_destroy(struct pipe_context * _pipe)4327 tc_destroy(struct pipe_context *_pipe)
4328 {
4329 struct threaded_context *tc = threaded_context(_pipe);
4330 struct pipe_context *pipe = tc->pipe;
4331
4332 if (tc->base.const_uploader &&
4333 tc->base.stream_uploader != tc->base.const_uploader)
4334 u_upload_destroy(tc->base.const_uploader);
4335
4336 if (tc->base.stream_uploader)
4337 u_upload_destroy(tc->base.stream_uploader);
4338
4339 tc_sync(tc);
4340
4341 if (util_queue_is_initialized(&tc->queue)) {
4342 util_queue_destroy(&tc->queue);
4343
4344 for (unsigned i = 0; i < TC_MAX_BATCHES; i++) {
4345 util_queue_fence_destroy(&tc->batch_slots[i].fence);
4346 assert(!tc->batch_slots[i].token);
4347 }
4348 }
4349
4350 slab_destroy_child(&tc->pool_transfers);
4351 assert(tc->batch_slots[tc->next].num_total_slots == 0);
4352 pipe->destroy(pipe);
4353
4354 for (unsigned i = 0; i < TC_MAX_BUFFER_LISTS; i++) {
4355 if (!util_queue_fence_is_signalled(&tc->buffer_lists[i].driver_flushed_fence))
4356 util_queue_fence_signal(&tc->buffer_lists[i].driver_flushed_fence);
4357 util_queue_fence_destroy(&tc->buffer_lists[i].driver_flushed_fence);
4358 }
4359
4360 FREE(tc);
4361 }
4362
4363 static const tc_execute execute_func[TC_NUM_CALLS] = {
4364 #define CALL(name) tc_call_##name,
4365 #include "u_threaded_context_calls.h"
4366 #undef CALL
4367 };
4368
tc_driver_internal_flush_notify(struct threaded_context * tc)4369 void tc_driver_internal_flush_notify(struct threaded_context *tc)
4370 {
4371 /* Allow drivers to call this function even for internal contexts that
4372 * don't have tc. It simplifies drivers.
4373 */
4374 if (!tc)
4375 return;
4376
4377 /* Signal fences set by tc_batch_execute. */
4378 for (unsigned i = 0; i < tc->num_signal_fences_next_flush; i++)
4379 util_queue_fence_signal(tc->signal_fences_next_flush[i]);
4380
4381 tc->num_signal_fences_next_flush = 0;
4382 }
4383
4384 /**
4385 * Wrap an existing pipe_context into a threaded_context.
4386 *
4387 * \param pipe pipe_context to wrap
4388 * \param parent_transfer_pool parent slab pool set up for creating pipe_-
4389 * transfer objects; the driver should have one
4390 * in pipe_screen.
4391 * \param replace_buffer callback for replacing a pipe_resource's storage
4392 * with another pipe_resource's storage.
4393 * \param options optional TC options/callbacks
4394 * \param out if successful, the threaded_context will be returned here in
4395 * addition to the return value if "out" != NULL
4396 */
4397 struct pipe_context *
threaded_context_create(struct pipe_context * pipe,struct slab_parent_pool * parent_transfer_pool,tc_replace_buffer_storage_func replace_buffer,const struct threaded_context_options * options,struct threaded_context ** out)4398 threaded_context_create(struct pipe_context *pipe,
4399 struct slab_parent_pool *parent_transfer_pool,
4400 tc_replace_buffer_storage_func replace_buffer,
4401 const struct threaded_context_options *options,
4402 struct threaded_context **out)
4403 {
4404 struct threaded_context *tc;
4405
4406 if (!pipe)
4407 return NULL;
4408
4409 if (!debug_get_bool_option("GALLIUM_THREAD", util_get_cpu_caps()->nr_cpus > 1))
4410 return pipe;
4411
4412 tc = CALLOC_STRUCT(threaded_context);
4413 if (!tc) {
4414 pipe->destroy(pipe);
4415 return NULL;
4416 }
4417
4418 if (options)
4419 tc->options = *options;
4420
4421 pipe = trace_context_create_threaded(pipe->screen, pipe, &replace_buffer, &tc->options);
4422
4423 /* The driver context isn't wrapped, so set its "priv" to NULL. */
4424 pipe->priv = NULL;
4425
4426 tc->pipe = pipe;
4427 tc->replace_buffer_storage = replace_buffer;
4428 tc->map_buffer_alignment =
4429 pipe->screen->get_param(pipe->screen, PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT);
4430 tc->ubo_alignment =
4431 MAX2(pipe->screen->get_param(pipe->screen, PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT), 64);
4432 tc->base.priv = pipe; /* priv points to the wrapped driver context */
4433 tc->base.screen = pipe->screen;
4434 tc->base.destroy = tc_destroy;
4435 tc->base.callback = tc_callback;
4436
4437 tc->base.stream_uploader = u_upload_clone(&tc->base, pipe->stream_uploader);
4438 if (pipe->stream_uploader == pipe->const_uploader)
4439 tc->base.const_uploader = tc->base.stream_uploader;
4440 else
4441 tc->base.const_uploader = u_upload_clone(&tc->base, pipe->const_uploader);
4442
4443 if (!tc->base.stream_uploader || !tc->base.const_uploader)
4444 goto fail;
4445
4446 tc->use_forced_staging_uploads = true;
4447
4448 /* The queue size is the number of batches "waiting". Batches are removed
4449 * from the queue before being executed, so keep one tc_batch slot for that
4450 * execution. Also, keep one unused slot for an unflushed batch.
4451 */
4452 if (!util_queue_init(&tc->queue, "gdrv", TC_MAX_BATCHES - 2, 1, 0, NULL))
4453 goto fail;
4454
4455 for (unsigned i = 0; i < TC_MAX_BATCHES; i++) {
4456 #if !defined(NDEBUG) && TC_DEBUG >= 1
4457 tc->batch_slots[i].sentinel = TC_SENTINEL;
4458 #endif
4459 tc->batch_slots[i].tc = tc;
4460 util_queue_fence_init(&tc->batch_slots[i].fence);
4461 }
4462 for (unsigned i = 0; i < TC_MAX_BUFFER_LISTS; i++)
4463 util_queue_fence_init(&tc->buffer_lists[i].driver_flushed_fence);
4464
4465 list_inithead(&tc->unflushed_queries);
4466
4467 slab_create_child(&tc->pool_transfers, parent_transfer_pool);
4468
4469 /* If you have different limits in each shader stage, set the maximum. */
4470 struct pipe_screen *screen = pipe->screen;;
4471 tc->max_vertex_buffers =
4472 screen->get_param(screen, PIPE_CAP_MAX_VERTEX_BUFFERS);
4473 tc->max_const_buffers =
4474 screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT,
4475 PIPE_SHADER_CAP_MAX_CONST_BUFFERS);
4476 tc->max_shader_buffers =
4477 screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT,
4478 PIPE_SHADER_CAP_MAX_SHADER_BUFFERS);
4479 tc->max_images =
4480 screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT,
4481 PIPE_SHADER_CAP_MAX_SHADER_IMAGES);
4482 tc->max_samplers =
4483 screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT,
4484 PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS);
4485
4486 tc->base.set_context_param = tc_set_context_param; /* always set this */
4487
4488 #define CTX_INIT(_member) \
4489 tc->base._member = tc->pipe->_member ? tc_##_member : NULL
4490
4491 CTX_INIT(flush);
4492 CTX_INIT(draw_vbo);
4493 CTX_INIT(draw_vertex_state);
4494 CTX_INIT(launch_grid);
4495 CTX_INIT(resource_copy_region);
4496 CTX_INIT(blit);
4497 CTX_INIT(clear);
4498 CTX_INIT(clear_render_target);
4499 CTX_INIT(clear_depth_stencil);
4500 CTX_INIT(clear_buffer);
4501 CTX_INIT(clear_texture);
4502 CTX_INIT(flush_resource);
4503 CTX_INIT(generate_mipmap);
4504 CTX_INIT(render_condition);
4505 CTX_INIT(create_query);
4506 CTX_INIT(create_batch_query);
4507 CTX_INIT(destroy_query);
4508 CTX_INIT(begin_query);
4509 CTX_INIT(end_query);
4510 CTX_INIT(get_query_result);
4511 CTX_INIT(get_query_result_resource);
4512 CTX_INIT(set_active_query_state);
4513 CTX_INIT(create_blend_state);
4514 CTX_INIT(bind_blend_state);
4515 CTX_INIT(delete_blend_state);
4516 CTX_INIT(create_sampler_state);
4517 CTX_INIT(bind_sampler_states);
4518 CTX_INIT(delete_sampler_state);
4519 CTX_INIT(create_rasterizer_state);
4520 CTX_INIT(bind_rasterizer_state);
4521 CTX_INIT(delete_rasterizer_state);
4522 CTX_INIT(create_depth_stencil_alpha_state);
4523 CTX_INIT(bind_depth_stencil_alpha_state);
4524 CTX_INIT(delete_depth_stencil_alpha_state);
4525 CTX_INIT(link_shader);
4526 CTX_INIT(create_fs_state);
4527 CTX_INIT(bind_fs_state);
4528 CTX_INIT(delete_fs_state);
4529 CTX_INIT(create_vs_state);
4530 CTX_INIT(bind_vs_state);
4531 CTX_INIT(delete_vs_state);
4532 CTX_INIT(create_gs_state);
4533 CTX_INIT(bind_gs_state);
4534 CTX_INIT(delete_gs_state);
4535 CTX_INIT(create_tcs_state);
4536 CTX_INIT(bind_tcs_state);
4537 CTX_INIT(delete_tcs_state);
4538 CTX_INIT(create_tes_state);
4539 CTX_INIT(bind_tes_state);
4540 CTX_INIT(delete_tes_state);
4541 CTX_INIT(create_compute_state);
4542 CTX_INIT(bind_compute_state);
4543 CTX_INIT(delete_compute_state);
4544 CTX_INIT(create_vertex_elements_state);
4545 CTX_INIT(bind_vertex_elements_state);
4546 CTX_INIT(delete_vertex_elements_state);
4547 CTX_INIT(set_blend_color);
4548 CTX_INIT(set_stencil_ref);
4549 CTX_INIT(set_sample_mask);
4550 CTX_INIT(set_min_samples);
4551 CTX_INIT(set_clip_state);
4552 CTX_INIT(set_constant_buffer);
4553 CTX_INIT(set_inlinable_constants);
4554 CTX_INIT(set_framebuffer_state);
4555 CTX_INIT(set_polygon_stipple);
4556 CTX_INIT(set_sample_locations);
4557 CTX_INIT(set_scissor_states);
4558 CTX_INIT(set_viewport_states);
4559 CTX_INIT(set_window_rectangles);
4560 CTX_INIT(set_sampler_views);
4561 CTX_INIT(set_tess_state);
4562 CTX_INIT(set_patch_vertices);
4563 CTX_INIT(set_shader_buffers);
4564 CTX_INIT(set_shader_images);
4565 CTX_INIT(set_vertex_buffers);
4566 CTX_INIT(create_stream_output_target);
4567 CTX_INIT(stream_output_target_destroy);
4568 CTX_INIT(set_stream_output_targets);
4569 CTX_INIT(create_sampler_view);
4570 CTX_INIT(sampler_view_destroy);
4571 CTX_INIT(create_surface);
4572 CTX_INIT(surface_destroy);
4573 CTX_INIT(buffer_map);
4574 CTX_INIT(texture_map);
4575 CTX_INIT(transfer_flush_region);
4576 CTX_INIT(buffer_unmap);
4577 CTX_INIT(texture_unmap);
4578 CTX_INIT(buffer_subdata);
4579 CTX_INIT(texture_subdata);
4580 CTX_INIT(texture_barrier);
4581 CTX_INIT(memory_barrier);
4582 CTX_INIT(resource_commit);
4583 CTX_INIT(create_video_codec);
4584 CTX_INIT(create_video_buffer);
4585 CTX_INIT(set_compute_resources);
4586 CTX_INIT(set_global_binding);
4587 CTX_INIT(get_sample_position);
4588 CTX_INIT(invalidate_resource);
4589 CTX_INIT(get_device_reset_status);
4590 CTX_INIT(set_device_reset_callback);
4591 CTX_INIT(dump_debug_state);
4592 CTX_INIT(set_log_context);
4593 CTX_INIT(emit_string_marker);
4594 CTX_INIT(set_debug_callback);
4595 CTX_INIT(create_fence_fd);
4596 CTX_INIT(fence_server_sync);
4597 CTX_INIT(fence_server_signal);
4598 CTX_INIT(get_timestamp);
4599 CTX_INIT(create_texture_handle);
4600 CTX_INIT(delete_texture_handle);
4601 CTX_INIT(make_texture_handle_resident);
4602 CTX_INIT(create_image_handle);
4603 CTX_INIT(delete_image_handle);
4604 CTX_INIT(make_image_handle_resident);
4605 CTX_INIT(set_frontend_noop);
4606 CTX_INIT(init_intel_perf_query_info);
4607 CTX_INIT(get_intel_perf_query_info);
4608 CTX_INIT(get_intel_perf_query_counter_info);
4609 CTX_INIT(new_intel_perf_query_obj);
4610 CTX_INIT(begin_intel_perf_query);
4611 CTX_INIT(end_intel_perf_query);
4612 CTX_INIT(delete_intel_perf_query);
4613 CTX_INIT(wait_intel_perf_query);
4614 CTX_INIT(is_intel_perf_query_ready);
4615 CTX_INIT(get_intel_perf_query_data);
4616 #undef CTX_INIT
4617
4618 if (out)
4619 *out = tc;
4620
4621 tc_begin_next_buffer_list(tc);
4622 return &tc->base;
4623
4624 fail:
4625 tc_destroy(&tc->base);
4626 return NULL;
4627 }
4628
4629 void
threaded_context_init_bytes_mapped_limit(struct threaded_context * tc,unsigned divisor)4630 threaded_context_init_bytes_mapped_limit(struct threaded_context *tc, unsigned divisor)
4631 {
4632 uint64_t total_ram;
4633 if (os_get_total_physical_memory(&total_ram)) {
4634 tc->bytes_mapped_limit = total_ram / divisor;
4635 if (sizeof(void*) == 4)
4636 tc->bytes_mapped_limit = MIN2(tc->bytes_mapped_limit, 512*1024*1024UL);
4637 }
4638 }
4639