1 /**************************************************************************
2 *
3 * Copyright 2017 Advanced Micro Devices, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * on the rights to use, copy, modify, merge, publish, distribute, sub
10 * license, and/or sell copies of the Software, and to permit persons to whom
11 * the Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
21 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
22 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
23 * USE OR OTHER DEALINGS IN THE SOFTWARE.
24 *
25 **************************************************************************/
26
27 #include "util/u_threaded_context.h"
28 #include "util/u_cpu_detect.h"
29 #include "util/format/u_format.h"
30 #include "util/u_inlines.h"
31 #include "util/u_memory.h"
32 #include "util/u_upload_mgr.h"
33 #include "driver_trace/tr_context.h"
34 #include "util/log.h"
35 #include "util/perf/cpu_trace.h"
36 #include "util/thread_sched.h"
37 #include "compiler/shader_info.h"
38
39 #if TC_DEBUG >= 1
40 #define tc_assert assert
41 #else
42 #define tc_assert(x)
43 #endif
44
45 #if TC_DEBUG >= 2
46 #define tc_printf mesa_logi
47 #define tc_asprintf asprintf
48 #define tc_strcmp strcmp
49 #else
50 #define tc_printf(...)
51 #define tc_asprintf(...) 0
52 #define tc_strcmp(...) 0
53 #endif
54
55 #define TC_SENTINEL 0x5ca1ab1e
56
57 #if TC_DEBUG >= 3 || defined(TC_TRACE)
58 static const char *tc_call_names[] = {
59 #define CALL(name) #name,
60 #include "u_threaded_context_calls.h"
61 #undef CALL
62 };
63 #endif
64
65 #ifdef TC_TRACE
66 # define TC_TRACE_SCOPE(call_id) MESA_TRACE_SCOPE(tc_call_names[call_id])
67 #else
68 # define TC_TRACE_SCOPE(call_id)
69 #endif
70
71 static void
72 tc_buffer_subdata(struct pipe_context *_pipe,
73 struct pipe_resource *resource,
74 unsigned usage, unsigned offset,
75 unsigned size, const void *data);
76
77 static void
tc_batch_check(UNUSED struct tc_batch * batch)78 tc_batch_check(UNUSED struct tc_batch *batch)
79 {
80 tc_assert(batch->sentinel == TC_SENTINEL);
81 tc_assert(batch->num_total_slots <= TC_SLOTS_PER_BATCH);
82 }
83
84 static void
tc_debug_check(struct threaded_context * tc)85 tc_debug_check(struct threaded_context *tc)
86 {
87 for (unsigned i = 0; i < TC_MAX_BATCHES; i++) {
88 tc_batch_check(&tc->batch_slots[i]);
89 tc_assert(tc->batch_slots[i].tc == tc);
90 }
91 }
92
93 static void
tc_set_driver_thread(struct threaded_context * tc)94 tc_set_driver_thread(struct threaded_context *tc)
95 {
96 #ifndef NDEBUG
97 tc->driver_thread = thrd_current();
98 #endif
99 }
100
101 static void
tc_clear_driver_thread(struct threaded_context * tc)102 tc_clear_driver_thread(struct threaded_context *tc)
103 {
104 #ifndef NDEBUG
105 memset(&tc->driver_thread, 0, sizeof(tc->driver_thread));
106 #endif
107 }
108
109 struct tc_batch_rp_info {
110 /* this is what drivers can see */
111 struct tc_renderpass_info info;
112 /* determines whether the info can be "safely" read by drivers or if it may still be in use */
113 struct util_queue_fence ready;
114 /* when a batch is full, the rp info rollsover onto 'next' */
115 struct tc_batch_rp_info *next;
116 /* when rp info has rolled over onto this struct, 'prev' is used to update pointers for realloc */
117 struct tc_batch_rp_info *prev;
118 };
119
120 static struct tc_batch_rp_info *
tc_batch_rp_info(struct tc_renderpass_info * info)121 tc_batch_rp_info(struct tc_renderpass_info *info)
122 {
123 return (struct tc_batch_rp_info *)info;
124 }
125
126 static void
tc_sanitize_renderpass_info(struct threaded_context * tc)127 tc_sanitize_renderpass_info(struct threaded_context *tc)
128 {
129 tc->renderpass_info_recording->cbuf_invalidate = 0;
130 tc->renderpass_info_recording->zsbuf_invalidate = false;
131 tc->renderpass_info_recording->cbuf_load |= (~tc->renderpass_info_recording->cbuf_clear) & BITFIELD_MASK(PIPE_MAX_COLOR_BUFS);
132 if (tc->fb_resources[PIPE_MAX_COLOR_BUFS] && !tc_renderpass_info_is_zsbuf_used(tc->renderpass_info_recording))
133 /* this should be a "safe" way to indicate to the driver that both loads and stores are required;
134 * driver can always detect invalidation
135 */
136 tc->renderpass_info_recording->zsbuf_clear_partial = true;
137 if (tc->num_queries_active)
138 tc->renderpass_info_recording->has_query_ends = true;
139 }
140
141 /* ensure the batch's array of renderpass data is large enough for the current index */
142 static void
tc_batch_renderpass_infos_resize(struct threaded_context * tc,struct tc_batch * batch)143 tc_batch_renderpass_infos_resize(struct threaded_context *tc, struct tc_batch *batch)
144 {
145 unsigned size = batch->renderpass_infos.capacity;
146 unsigned cur_num = MAX2(batch->renderpass_info_idx, 0);
147
148 if (size / sizeof(struct tc_batch_rp_info) > cur_num)
149 return;
150
151 struct tc_batch_rp_info *infos = batch->renderpass_infos.data;
152 unsigned old_idx = batch->renderpass_info_idx - 1;
153 bool redo = tc->renderpass_info_recording &&
154 tc->renderpass_info_recording == &infos[old_idx].info;
155 if (!util_dynarray_resize(&batch->renderpass_infos, struct tc_batch_rp_info, cur_num + 10))
156 mesa_loge("tc: memory alloc fail!");
157
158 if (size != batch->renderpass_infos.capacity) {
159 /* zero new allocation region */
160 uint8_t *data = batch->renderpass_infos.data;
161 memset(data + size, 0, batch->renderpass_infos.capacity - size);
162 unsigned start = size / sizeof(struct tc_batch_rp_info);
163 unsigned count = (batch->renderpass_infos.capacity - size) /
164 sizeof(struct tc_batch_rp_info);
165 infos = batch->renderpass_infos.data;
166 if (infos->prev)
167 infos->prev->next = infos;
168 for (unsigned i = 0; i < count; i++)
169 util_queue_fence_init(&infos[start + i].ready);
170 /* re-set current recording info on resize */
171 if (redo)
172 tc->renderpass_info_recording = &infos[old_idx].info;
173 }
174 }
175
176 /* signal that the renderpass info is "ready" for use by drivers and will no longer be updated */
177 static void
tc_signal_renderpass_info_ready(struct threaded_context * tc)178 tc_signal_renderpass_info_ready(struct threaded_context *tc)
179 {
180 if (tc->renderpass_info_recording &&
181 !util_queue_fence_is_signalled(&tc_batch_rp_info(tc->renderpass_info_recording)->ready))
182 util_queue_fence_signal(&tc_batch_rp_info(tc->renderpass_info_recording)->ready);
183 }
184
185 /* increment the current renderpass info struct for recording
186 * 'full_copy' is used for preserving data across non-blocking tc batch flushes
187 */
188 static void
tc_batch_increment_renderpass_info(struct threaded_context * tc,unsigned batch_idx,bool full_copy)189 tc_batch_increment_renderpass_info(struct threaded_context *tc, unsigned batch_idx, bool full_copy)
190 {
191 struct tc_batch *batch = &tc->batch_slots[batch_idx];
192 struct tc_batch_rp_info *tc_info = batch->renderpass_infos.data;
193
194 if (tc_info[0].next || batch->num_total_slots) {
195 /* deadlock condition detected: all batches are in flight, renderpass hasn't ended
196 * (probably a cts case)
197 */
198 struct tc_batch_rp_info *info = tc_batch_rp_info(tc->renderpass_info_recording);
199 if (!util_queue_fence_is_signalled(&info->ready)) {
200 /* this batch is actively executing and the driver is waiting on the recording fence to signal */
201 /* force all buffer usage to avoid data loss */
202 info->info.cbuf_load = ~(BITFIELD_MASK(8) & info->info.cbuf_clear);
203 info->info.zsbuf_clear_partial = true;
204 info->info.has_query_ends = tc->num_queries_active > 0;
205 /* ensure threaded_context_get_renderpass_info() won't deadlock */
206 info->next = NULL;
207 util_queue_fence_signal(&info->ready);
208 }
209 /* always wait on the batch to finish since this will otherwise overwrite thread data */
210 util_queue_fence_wait(&batch->fence);
211 }
212 /* increment rp info and initialize it */
213 batch->renderpass_info_idx++;
214 tc_batch_renderpass_infos_resize(tc, batch);
215 tc_info = batch->renderpass_infos.data;
216
217 if (full_copy) {
218 /* this should only be called when changing batches */
219 assert(batch->renderpass_info_idx == 0);
220 /* copy the previous data in its entirety: this is still the same renderpass */
221 if (tc->renderpass_info_recording) {
222 tc_info[batch->renderpass_info_idx].info.data = tc->renderpass_info_recording->data;
223 tc_batch_rp_info(tc->renderpass_info_recording)->next = &tc_info[batch->renderpass_info_idx];
224 tc_info[batch->renderpass_info_idx].prev = tc_batch_rp_info(tc->renderpass_info_recording);
225 /* guard against deadlock scenario */
226 assert(&tc_batch_rp_info(tc->renderpass_info_recording)->next->info != tc->renderpass_info_recording);
227 } else {
228 tc_info[batch->renderpass_info_idx].info.data = 0;
229 tc_info[batch->renderpass_info_idx].prev = NULL;
230 }
231 } else {
232 /* selectively copy: only the CSO metadata is copied, and a new framebuffer state will be added later */
233 tc_info[batch->renderpass_info_idx].info.data = 0;
234 if (tc->renderpass_info_recording) {
235 tc_info[batch->renderpass_info_idx].info.data16[2] = tc->renderpass_info_recording->data16[2];
236 tc_batch_rp_info(tc->renderpass_info_recording)->next = NULL;
237 tc_info[batch->renderpass_info_idx].prev = NULL;
238 }
239 }
240
241 assert(!full_copy || !tc->renderpass_info_recording || tc_batch_rp_info(tc->renderpass_info_recording)->next);
242 /* signal existing info since it will not be used anymore */
243 tc_signal_renderpass_info_ready(tc);
244 util_queue_fence_reset(&tc_info[batch->renderpass_info_idx].ready);
245 /* guard against deadlock scenario */
246 assert(tc->renderpass_info_recording != &tc_info[batch->renderpass_info_idx].info);
247 /* this is now the current recording renderpass info */
248 tc->renderpass_info_recording = &tc_info[batch->renderpass_info_idx].info;
249 batch->max_renderpass_info_idx = batch->renderpass_info_idx;
250 }
251
252 static ALWAYS_INLINE struct tc_renderpass_info *
tc_get_renderpass_info(struct threaded_context * tc)253 tc_get_renderpass_info(struct threaded_context *tc)
254 {
255 return tc->renderpass_info_recording;
256 }
257
258 /* update metadata at draw time */
259 static void
tc_parse_draw(struct threaded_context * tc)260 tc_parse_draw(struct threaded_context *tc)
261 {
262 struct tc_renderpass_info *info = tc_get_renderpass_info(tc);
263
264 if (info) {
265 /* all buffers that aren't cleared are considered loaded */
266 info->cbuf_load |= ~info->cbuf_clear;
267 if (!info->zsbuf_clear)
268 info->zsbuf_load = true;
269 /* previous invalidates are no longer relevant */
270 info->cbuf_invalidate = 0;
271 info->zsbuf_invalidate = false;
272 info->has_draw = true;
273 info->has_query_ends |= tc->query_ended;
274 }
275
276 tc->in_renderpass = true;
277 tc->seen_fb_state = true;
278 tc->query_ended = false;
279 }
280
281 static void *
to_call_check(void * ptr,unsigned num_slots)282 to_call_check(void *ptr, unsigned num_slots)
283 {
284 #if TC_DEBUG >= 1
285 struct tc_call_base *call = ptr;
286 tc_assert(call->num_slots == num_slots);
287 #endif
288 return ptr;
289 }
290 #define to_call(ptr, type) ((struct type *)to_call_check((void *)(ptr), call_size(type)))
291
292 #define size_to_slots(size) DIV_ROUND_UP(size, 8)
293 #define call_size(type) size_to_slots(sizeof(struct type))
294 #define call_size_with_slots(type, num_slots) size_to_slots( \
295 sizeof(struct type) + sizeof(((struct type*)NULL)->slot[0]) * (num_slots))
296 #define get_next_call(ptr, type) ((struct type*)((uint64_t*)ptr + call_size(type)))
297
298 ALWAYS_INLINE static void
tc_set_resource_batch_usage(struct threaded_context * tc,struct pipe_resource * pres)299 tc_set_resource_batch_usage(struct threaded_context *tc, struct pipe_resource *pres)
300 {
301 /* ignore batch usage when persistent */
302 if (threaded_resource(pres)->last_batch_usage != INT8_MAX)
303 threaded_resource(pres)->last_batch_usage = tc->next;
304 threaded_resource(pres)->batch_generation = tc->batch_generation;
305 }
306
307 ALWAYS_INLINE static void
tc_set_resource_batch_usage_persistent(struct threaded_context * tc,struct pipe_resource * pres,bool enable)308 tc_set_resource_batch_usage_persistent(struct threaded_context *tc, struct pipe_resource *pres, bool enable)
309 {
310 if (!pres)
311 return;
312 /* mark with special value to block any unsynchronized access */
313 threaded_resource(pres)->last_batch_usage = enable ? INT8_MAX : tc->next;
314 threaded_resource(pres)->batch_generation = tc->batch_generation;
315 }
316
317 /* this can ONLY be used to check against the currently recording batch */
318 ALWAYS_INLINE static bool
tc_resource_batch_usage_test_busy(const struct threaded_context * tc,const struct pipe_resource * pres)319 tc_resource_batch_usage_test_busy(const struct threaded_context *tc, const struct pipe_resource *pres)
320 {
321 const struct threaded_resource *tbuf = (const struct threaded_resource*)pres;
322
323 if (!tc->options.unsynchronized_texture_subdata)
324 return true;
325
326 /* resource has persistent access: assume always busy */
327 if (tbuf->last_batch_usage == INT8_MAX)
328 return true;
329
330 /* resource has never been seen */
331 if (tbuf->last_batch_usage == -1)
332 return false;
333
334 /* resource has been seen but no batches have executed */
335 if (tc->last_completed == -1)
336 return true;
337
338 /* begin comparisons checking number of times batches have cycled */
339 unsigned diff = tc->batch_generation - tbuf->batch_generation;
340 /* resource has been seen, batches have fully cycled at least once */
341 if (diff > 1)
342 return false;
343
344 /* resource has been seen in current batch cycle: return whether batch has definitely completed */
345 if (diff == 0)
346 return tc->last_completed >= tbuf->last_batch_usage;
347
348 /* resource has been seen within one batch cycle: check for batch wrapping */
349 if (tc->last_completed >= tbuf->last_batch_usage)
350 /* this or a subsequent pre-wrap batch was the last to definitely complete: resource is idle */
351 return false;
352
353 /* batch execution has not definitely wrapped: resource is definitely not idle */
354 if (tc->last_completed > tc->next)
355 return true;
356
357 /* resource was seen pre-wrap, batch execution has definitely wrapped: idle */
358 if (tbuf->last_batch_usage > tc->last_completed)
359 return false;
360
361 /* tc->last_completed is not an exact measurement, so anything else is considered busy */
362 return true;
363 }
364
365 /* Assign src to dst while dst is uninitialized. */
366 static inline void
tc_set_resource_reference(struct pipe_resource ** dst,struct pipe_resource * src)367 tc_set_resource_reference(struct pipe_resource **dst, struct pipe_resource *src)
368 {
369 *dst = src;
370 pipe_reference(NULL, &src->reference); /* only increment refcount */
371 }
372
373 /* Assign src to dst while dst is uninitialized. */
374 static inline void
tc_set_vertex_state_reference(struct pipe_vertex_state ** dst,struct pipe_vertex_state * src)375 tc_set_vertex_state_reference(struct pipe_vertex_state **dst,
376 struct pipe_vertex_state *src)
377 {
378 *dst = src;
379 pipe_reference(NULL, &src->reference); /* only increment refcount */
380 }
381
382 /* Unreference dst but don't touch the dst pointer. */
383 static inline void
tc_drop_resource_reference(struct pipe_resource * dst)384 tc_drop_resource_reference(struct pipe_resource *dst)
385 {
386 if (pipe_reference(&dst->reference, NULL)) /* only decrement refcount */
387 pipe_resource_destroy(dst);
388 }
389
390 /* Unreference dst but don't touch the dst pointer. */
391 static inline void
tc_drop_surface_reference(struct pipe_surface * dst)392 tc_drop_surface_reference(struct pipe_surface *dst)
393 {
394 if (pipe_reference(&dst->reference, NULL)) /* only decrement refcount */
395 dst->context->surface_destroy(dst->context, dst);
396 }
397
398 /* Unreference dst but don't touch the dst pointer. */
399 static inline void
tc_drop_so_target_reference(struct pipe_stream_output_target * dst)400 tc_drop_so_target_reference(struct pipe_stream_output_target *dst)
401 {
402 if (pipe_reference(&dst->reference, NULL)) /* only decrement refcount */
403 dst->context->stream_output_target_destroy(dst->context, dst);
404 }
405
406 /**
407 * Subtract the given number of references.
408 */
409 static inline void
tc_drop_vertex_state_references(struct pipe_vertex_state * dst,int num_refs)410 tc_drop_vertex_state_references(struct pipe_vertex_state *dst, int num_refs)
411 {
412 int count = p_atomic_add_return(&dst->reference.count, -num_refs);
413
414 assert(count >= 0);
415 /* Underflows shouldn't happen, but let's be safe. */
416 if (count <= 0)
417 dst->screen->vertex_state_destroy(dst->screen, dst);
418 }
419
420 /* We don't want to read or write min_index and max_index, because
421 * it shouldn't be needed by drivers at this point.
422 */
423 #define DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX \
424 offsetof(struct pipe_draw_info, min_index)
425
426 ALWAYS_INLINE static struct tc_renderpass_info *
incr_rp_info(struct tc_renderpass_info * tc_info)427 incr_rp_info(struct tc_renderpass_info *tc_info)
428 {
429 struct tc_batch_rp_info *info = tc_batch_rp_info(tc_info);
430 return &info[1].info;
431 }
432
433 ALWAYS_INLINE static void
batch_execute(struct tc_batch * batch,struct pipe_context * pipe,uint64_t * last,bool parsing)434 batch_execute(struct tc_batch *batch, struct pipe_context *pipe, uint64_t *last, bool parsing)
435 {
436 /* if the framebuffer state is persisting from a previous batch,
437 * begin incrementing renderpass info on the first set_framebuffer_state call
438 */
439 bool first = !batch->first_set_fb;
440 const tc_execute *execute_func = batch->tc->execute_func;
441
442 for (uint64_t *iter = batch->slots; iter != last;) {
443 struct tc_call_base *call = (struct tc_call_base *)iter;
444
445 tc_assert(call->sentinel == TC_SENTINEL);
446
447 #if TC_DEBUG >= 3
448 tc_printf("CALL: %s", tc_call_names[call->call_id]);
449 #endif
450
451 TC_TRACE_SCOPE(call->call_id);
452
453 iter += execute_func[call->call_id](pipe, call);
454
455 if (parsing) {
456 if (call->call_id == TC_CALL_flush) {
457 /* always increment renderpass info for non-deferred flushes */
458 batch->tc->renderpass_info = incr_rp_info(batch->tc->renderpass_info);
459 /* if a flush happens, renderpass info is always incremented after */
460 first = false;
461 } else if (call->call_id == TC_CALL_set_framebuffer_state) {
462 /* the renderpass info pointer is already set at the start of the batch,
463 * so don't increment on the first set_framebuffer_state call
464 */
465 if (!first)
466 batch->tc->renderpass_info = incr_rp_info(batch->tc->renderpass_info);
467 first = false;
468 } else if (call->call_id >= TC_CALL_draw_single &&
469 call->call_id <= TC_CALL_draw_vstate_multi) {
470 /* if a draw happens before a set_framebuffer_state on this batch,
471 * begin incrementing renderpass data
472 */
473 first = false;
474 }
475 }
476 }
477 }
478
479 static void
tc_batch_execute(void * job,UNUSED void * gdata,int thread_index)480 tc_batch_execute(void *job, UNUSED void *gdata, int thread_index)
481 {
482 struct tc_batch *batch = job;
483 struct pipe_context *pipe = batch->tc->pipe;
484 uint64_t *last = &batch->slots[batch->num_total_slots];
485
486 tc_batch_check(batch);
487 tc_set_driver_thread(batch->tc);
488
489 assert(!batch->token);
490
491 /* setup renderpass info */
492 batch->tc->renderpass_info = batch->renderpass_infos.data;
493
494 if (batch->tc->options.parse_renderpass_info) {
495 batch_execute(batch, pipe, last, true);
496
497 struct tc_batch_rp_info *info = batch->renderpass_infos.data;
498 for (unsigned i = 0; i < batch->max_renderpass_info_idx + 1; i++) {
499 if (info[i].next)
500 info[i].next->prev = NULL;
501 info[i].next = NULL;
502 }
503 } else {
504 batch_execute(batch, pipe, last, false);
505 }
506
507 /* Add the fence to the list of fences for the driver to signal at the next
508 * flush, which we use for tracking which buffers are referenced by
509 * an unflushed command buffer.
510 */
511 struct threaded_context *tc = batch->tc;
512 struct util_queue_fence *fence =
513 &tc->buffer_lists[batch->buffer_list_index].driver_flushed_fence;
514
515 if (tc->options.driver_calls_flush_notify) {
516 tc->signal_fences_next_flush[tc->num_signal_fences_next_flush++] = fence;
517
518 /* Since our buffer lists are chained as a ring, we need to flush
519 * the context twice as we go around the ring to make the driver signal
520 * the buffer list fences, so that the producer thread can reuse the buffer
521 * list structures for the next batches without waiting.
522 */
523 unsigned half_ring = TC_MAX_BUFFER_LISTS / 2;
524 if (batch->buffer_list_index % half_ring == half_ring - 1)
525 pipe->flush(pipe, NULL, PIPE_FLUSH_ASYNC);
526 } else {
527 util_queue_fence_signal(fence);
528 }
529
530 tc_clear_driver_thread(batch->tc);
531 tc_batch_check(batch);
532 batch->num_total_slots = 0;
533 batch->last_mergeable_call = NULL;
534 batch->first_set_fb = false;
535 batch->max_renderpass_info_idx = 0;
536 batch->tc->last_completed = batch->batch_idx;
537 }
538
539 static void
tc_begin_next_buffer_list(struct threaded_context * tc)540 tc_begin_next_buffer_list(struct threaded_context *tc)
541 {
542 tc->next_buf_list = (tc->next_buf_list + 1) % TC_MAX_BUFFER_LISTS;
543
544 tc->batch_slots[tc->next].buffer_list_index = tc->next_buf_list;
545
546 /* Clear the buffer list in the new empty batch. */
547 struct tc_buffer_list *buf_list = &tc->buffer_lists[tc->next_buf_list];
548 assert(util_queue_fence_is_signalled(&buf_list->driver_flushed_fence));
549 util_queue_fence_reset(&buf_list->driver_flushed_fence); /* set to unsignalled */
550 BITSET_ZERO(buf_list->buffer_list);
551
552 tc->add_all_gfx_bindings_to_buffer_list = true;
553 tc->add_all_compute_bindings_to_buffer_list = true;
554 }
555
556 static void
tc_add_call_end(struct tc_batch * next)557 tc_add_call_end(struct tc_batch *next)
558 {
559 /* Add a dummy last call that won't be executed, but will indicate the end
560 * of the batch. It's for calls that always look at the next call and this
561 * stops them looking farther ahead.
562 */
563 assert(next->num_total_slots < TC_SLOTS_PER_BATCH);
564 struct tc_call_base *call =
565 (struct tc_call_base*)&next->slots[next->num_total_slots];
566 call->call_id = TC_NUM_CALLS;
567 call->num_slots = 1;
568 }
569
570 static void
tc_batch_flush(struct threaded_context * tc,bool full_copy)571 tc_batch_flush(struct threaded_context *tc, bool full_copy)
572 {
573 struct tc_batch *next = &tc->batch_slots[tc->next];
574 unsigned next_id = (tc->next + 1) % TC_MAX_BATCHES;
575
576 tc_assert(next->num_total_slots != 0);
577 tc_add_call_end(next);
578
579 tc_batch_check(next);
580 tc_debug_check(tc);
581 tc->bytes_mapped_estimate = 0;
582 p_atomic_add(&tc->num_offloaded_slots, next->num_total_slots);
583
584 if (next->token) {
585 next->token->tc = NULL;
586 tc_unflushed_batch_token_reference(&next->token, NULL);
587 }
588 /* reset renderpass info index for subsequent use */
589 next->renderpass_info_idx = -1;
590
591 /* always increment renderpass info on batch flush;
592 * renderpass info can only be accessed by its owner batch during execution
593 */
594 if (tc->renderpass_info_recording) {
595 tc->batch_slots[next_id].first_set_fb = full_copy;
596 tc_batch_increment_renderpass_info(tc, next_id, full_copy);
597 }
598
599 util_queue_add_job(&tc->queue, next, &next->fence, tc_batch_execute,
600 NULL, 0);
601 tc->last = tc->next;
602 tc->next = next_id;
603 if (next_id == 0)
604 tc->batch_generation++;
605 tc_begin_next_buffer_list(tc);
606
607 }
608
609 /* This is the function that adds variable-sized calls into the current
610 * batch. It also flushes the batch if there is not enough space there.
611 * All other higher-level "add" functions use it.
612 */
613 static void *
tc_add_sized_call(struct threaded_context * tc,enum tc_call_id id,unsigned num_slots)614 tc_add_sized_call(struct threaded_context *tc, enum tc_call_id id,
615 unsigned num_slots)
616 {
617 TC_TRACE_SCOPE(id);
618 struct tc_batch *next = &tc->batch_slots[tc->next];
619 assert(num_slots <= TC_SLOTS_PER_BATCH - 1);
620 tc_debug_check(tc);
621
622 if (unlikely(next->num_total_slots + num_slots > TC_SLOTS_PER_BATCH - 1)) {
623 /* copy existing renderpass info during flush */
624 tc_batch_flush(tc, true);
625 next = &tc->batch_slots[tc->next];
626 tc_assert(next->num_total_slots == 0);
627 tc_assert(next->last_mergeable_call == NULL);
628 }
629
630 tc_assert(util_queue_fence_is_signalled(&next->fence));
631
632 struct tc_call_base *call = (struct tc_call_base*)&next->slots[next->num_total_slots];
633 next->num_total_slots += num_slots;
634
635 #if !defined(NDEBUG) && TC_DEBUG >= 1
636 call->sentinel = TC_SENTINEL;
637 #endif
638 call->call_id = id;
639 call->num_slots = num_slots;
640
641 #if TC_DEBUG >= 3
642 tc_printf("ENQUEUE: %s", tc_call_names[id]);
643 #endif
644
645 tc_debug_check(tc);
646 return call;
647 }
648
649 #define tc_add_call(tc, execute, type) \
650 ((struct type*)tc_add_sized_call(tc, execute, call_size(type)))
651
652 #define tc_add_slot_based_call(tc, execute, type, num_slots) \
653 ((struct type*)tc_add_sized_call(tc, execute, \
654 call_size_with_slots(type, num_slots)))
655
656 /* Returns the last mergeable call that was added to the unflushed
657 * batch, or NULL if the address of that call is not currently known
658 * or no such call exists in the unflushed batch.
659 */
660 static struct tc_call_base *
tc_get_last_mergeable_call(struct threaded_context * tc)661 tc_get_last_mergeable_call(struct threaded_context *tc)
662 {
663 struct tc_batch *batch = &tc->batch_slots[tc->next];
664 struct tc_call_base *call = batch->last_mergeable_call;
665
666 tc_assert(call == NULL || call->num_slots <= batch->num_total_slots);
667
668 if (call && (uint64_t *)call == &batch->slots[batch->num_total_slots - call->num_slots])
669 return call;
670 else
671 return NULL;
672 }
673
674 /* Increases the size of the last call in the unflushed batch to the
675 * given number of slots, if possible, without changing the call's data.
676 */
677 static bool
tc_enlarge_last_mergeable_call(struct threaded_context * tc,unsigned desired_num_slots)678 tc_enlarge_last_mergeable_call(struct threaded_context *tc, unsigned desired_num_slots)
679 {
680 struct tc_batch *batch = &tc->batch_slots[tc->next];
681 struct tc_call_base *call = tc_get_last_mergeable_call(tc);
682
683 tc_assert(call);
684 tc_assert(desired_num_slots >= call->num_slots);
685
686 unsigned added_slots = desired_num_slots - call->num_slots;
687
688 if (unlikely(batch->num_total_slots + added_slots > TC_SLOTS_PER_BATCH - 1))
689 return false;
690
691 batch->num_total_slots += added_slots;
692 call->num_slots += added_slots;
693
694 return true;
695 }
696
697 static void
tc_mark_call_mergeable(struct threaded_context * tc,struct tc_call_base * call)698 tc_mark_call_mergeable(struct threaded_context *tc, struct tc_call_base *call)
699 {
700 struct tc_batch *batch = &tc->batch_slots[tc->next];
701 tc_assert(call->num_slots <= batch->num_total_slots);
702 tc_assert((uint64_t *)call == &batch->slots[batch->num_total_slots - call->num_slots]);
703 batch->last_mergeable_call = call;
704 }
705
706 static bool
tc_is_sync(struct threaded_context * tc)707 tc_is_sync(struct threaded_context *tc)
708 {
709 struct tc_batch *last = &tc->batch_slots[tc->last];
710 struct tc_batch *next = &tc->batch_slots[tc->next];
711
712 return util_queue_fence_is_signalled(&last->fence) &&
713 !next->num_total_slots;
714 }
715
716 static void
_tc_sync(struct threaded_context * tc,UNUSED const char * info,UNUSED const char * func)717 _tc_sync(struct threaded_context *tc, UNUSED const char *info, UNUSED const char *func)
718 {
719 struct tc_batch *last = &tc->batch_slots[tc->last];
720 struct tc_batch *next = &tc->batch_slots[tc->next];
721 bool synced = false;
722
723 MESA_TRACE_SCOPE(func);
724
725 tc_debug_check(tc);
726
727 if (tc->options.parse_renderpass_info && tc->in_renderpass && !tc->flushing) {
728 /* corner case: if tc syncs for any reason but a driver flush during a renderpass,
729 * then the current renderpass info MUST be signaled to avoid deadlocking the driver
730 *
731 * this is not a "complete" signal operation, however, as it's unknown what calls may
732 * come after this one, which means that framebuffer attachment data is unreliable
733 *
734 * to avoid erroneously passing bad state to the driver (e.g., allowing zsbuf elimination),
735 * force all attachments active and assume the app was going to get bad perf here anyway
736 */
737 tc_sanitize_renderpass_info(tc);
738 }
739 tc_signal_renderpass_info_ready(tc);
740
741 /* Only wait for queued calls... */
742 if (!util_queue_fence_is_signalled(&last->fence)) {
743 util_queue_fence_wait(&last->fence);
744 synced = true;
745 }
746
747 tc_debug_check(tc);
748
749 if (next->token) {
750 next->token->tc = NULL;
751 tc_unflushed_batch_token_reference(&next->token, NULL);
752 }
753
754 /* .. and execute unflushed calls directly. */
755 if (next->num_total_slots) {
756 p_atomic_add(&tc->num_direct_slots, next->num_total_slots);
757 tc->bytes_mapped_estimate = 0;
758 tc_add_call_end(next);
759 tc_batch_execute(next, NULL, 0);
760 tc_begin_next_buffer_list(tc);
761 synced = true;
762 }
763
764 if (synced) {
765 p_atomic_inc(&tc->num_syncs);
766
767 if (tc_strcmp(func, "tc_destroy") != 0) {
768 tc_printf("sync %s %s", func, info);
769 }
770 }
771
772 tc_debug_check(tc);
773
774 if (tc->options.parse_renderpass_info) {
775 int renderpass_info_idx = next->renderpass_info_idx;
776 if (renderpass_info_idx > 0) {
777 /* don't reset if fb state is unflushed */
778 bool fb_no_draw = tc->seen_fb_state && !tc->renderpass_info_recording->has_draw;
779 uint32_t fb_info = tc->renderpass_info_recording->data32[0];
780 next->renderpass_info_idx = -1;
781 tc_batch_increment_renderpass_info(tc, tc->next, false);
782 if (fb_no_draw)
783 tc->renderpass_info_recording->data32[0] = fb_info;
784 } else if (tc->renderpass_info_recording->has_draw) {
785 tc->renderpass_info_recording->data32[0] = 0;
786 }
787 tc->seen_fb_state = false;
788 tc->query_ended = false;
789 }
790 }
791
792 #define tc_sync(tc) _tc_sync(tc, "", __func__)
793 #define tc_sync_msg(tc, info) _tc_sync(tc, info, __func__)
794
795 /**
796 * Call this from fence_finish for same-context fence waits of deferred fences
797 * that haven't been flushed yet.
798 *
799 * The passed pipe_context must be the one passed to pipe_screen::fence_finish,
800 * i.e., the wrapped one.
801 */
802 void
threaded_context_flush(struct pipe_context * _pipe,struct tc_unflushed_batch_token * token,bool prefer_async)803 threaded_context_flush(struct pipe_context *_pipe,
804 struct tc_unflushed_batch_token *token,
805 bool prefer_async)
806 {
807 struct threaded_context *tc = threaded_context(_pipe);
808
809 /* This is called from the gallium frontend / application thread. */
810 if (token->tc && token->tc == tc) {
811 struct tc_batch *last = &tc->batch_slots[tc->last];
812
813 /* Prefer to do the flush in the driver thread if it is already
814 * running. That should be better for cache locality.
815 */
816 if (prefer_async || !util_queue_fence_is_signalled(&last->fence))
817 tc_batch_flush(tc, false);
818 else
819 tc_sync(token->tc);
820 }
821 }
822
823 static void
tc_add_to_buffer_list(struct tc_buffer_list * next,struct pipe_resource * buf)824 tc_add_to_buffer_list(struct tc_buffer_list *next, struct pipe_resource *buf)
825 {
826 uint32_t id = threaded_resource(buf)->buffer_id_unique;
827 BITSET_SET(next->buffer_list, id & TC_BUFFER_ID_MASK);
828 }
829
830 /* Reset a range of buffer binding slots. */
831 static void
tc_unbind_buffers(uint32_t * binding,unsigned count)832 tc_unbind_buffers(uint32_t *binding, unsigned count)
833 {
834 if (count)
835 memset(binding, 0, sizeof(*binding) * count);
836 }
837
838 static void
tc_add_bindings_to_buffer_list(BITSET_WORD * buffer_list,const uint32_t * bindings,unsigned count)839 tc_add_bindings_to_buffer_list(BITSET_WORD *buffer_list, const uint32_t *bindings,
840 unsigned count)
841 {
842 for (unsigned i = 0; i < count; i++) {
843 if (bindings[i])
844 BITSET_SET(buffer_list, bindings[i] & TC_BUFFER_ID_MASK);
845 }
846 }
847
848 static bool
tc_rebind_bindings(uint32_t old_id,uint32_t new_id,uint32_t * bindings,unsigned count)849 tc_rebind_bindings(uint32_t old_id, uint32_t new_id, uint32_t *bindings,
850 unsigned count)
851 {
852 unsigned rebind_count = 0;
853
854 for (unsigned i = 0; i < count; i++) {
855 if (bindings[i] == old_id) {
856 bindings[i] = new_id;
857 rebind_count++;
858 }
859 }
860 return rebind_count;
861 }
862
863 static void
tc_add_shader_bindings_to_buffer_list(struct threaded_context * tc,BITSET_WORD * buffer_list,enum pipe_shader_type shader)864 tc_add_shader_bindings_to_buffer_list(struct threaded_context *tc,
865 BITSET_WORD *buffer_list,
866 enum pipe_shader_type shader)
867 {
868 tc_add_bindings_to_buffer_list(buffer_list, tc->const_buffers[shader],
869 tc->max_const_buffers);
870 if (tc->seen_shader_buffers[shader]) {
871 tc_add_bindings_to_buffer_list(buffer_list, tc->shader_buffers[shader],
872 tc->max_shader_buffers);
873 }
874 if (tc->seen_image_buffers[shader]) {
875 tc_add_bindings_to_buffer_list(buffer_list, tc->image_buffers[shader],
876 tc->max_images);
877 }
878 if (tc->seen_sampler_buffers[shader]) {
879 tc_add_bindings_to_buffer_list(buffer_list, tc->sampler_buffers[shader],
880 tc->max_samplers);
881 }
882 }
883
884 static unsigned
tc_rebind_shader_bindings(struct threaded_context * tc,uint32_t old_id,uint32_t new_id,enum pipe_shader_type shader,uint32_t * rebind_mask)885 tc_rebind_shader_bindings(struct threaded_context *tc, uint32_t old_id,
886 uint32_t new_id, enum pipe_shader_type shader, uint32_t *rebind_mask)
887 {
888 unsigned ubo = 0, ssbo = 0, img = 0, sampler = 0;
889
890 ubo = tc_rebind_bindings(old_id, new_id, tc->const_buffers[shader],
891 tc->max_const_buffers);
892 if (ubo)
893 *rebind_mask |= BITFIELD_BIT(TC_BINDING_UBO_VS) << shader;
894 if (tc->seen_shader_buffers[shader]) {
895 ssbo = tc_rebind_bindings(old_id, new_id, tc->shader_buffers[shader],
896 tc->max_shader_buffers);
897 if (ssbo)
898 *rebind_mask |= BITFIELD_BIT(TC_BINDING_SSBO_VS) << shader;
899 }
900 if (tc->seen_image_buffers[shader]) {
901 img = tc_rebind_bindings(old_id, new_id, tc->image_buffers[shader],
902 tc->max_images);
903 if (img)
904 *rebind_mask |= BITFIELD_BIT(TC_BINDING_IMAGE_VS) << shader;
905 }
906 if (tc->seen_sampler_buffers[shader]) {
907 sampler = tc_rebind_bindings(old_id, new_id, tc->sampler_buffers[shader],
908 tc->max_samplers);
909 if (sampler)
910 *rebind_mask |= BITFIELD_BIT(TC_BINDING_SAMPLERVIEW_VS) << shader;
911 }
912 return ubo + ssbo + img + sampler;
913 }
914
915 /* Add all bound buffers used by VS/TCS/TES/GS/FS to the buffer list.
916 * This is called by the first draw call in a batch when we want to inherit
917 * all bindings set by the previous batch.
918 */
919 static void
tc_add_all_gfx_bindings_to_buffer_list(struct threaded_context * tc)920 tc_add_all_gfx_bindings_to_buffer_list(struct threaded_context *tc)
921 {
922 BITSET_WORD *buffer_list = tc->buffer_lists[tc->next_buf_list].buffer_list;
923
924 tc_add_bindings_to_buffer_list(buffer_list, tc->vertex_buffers, tc->num_vertex_buffers);
925 if (tc->seen_streamout_buffers)
926 tc_add_bindings_to_buffer_list(buffer_list, tc->streamout_buffers, PIPE_MAX_SO_BUFFERS);
927
928 tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_VERTEX);
929 tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_FRAGMENT);
930
931 if (tc->seen_tcs)
932 tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_TESS_CTRL);
933 if (tc->seen_tes)
934 tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_TESS_EVAL);
935 if (tc->seen_gs)
936 tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_GEOMETRY);
937
938 tc->add_all_gfx_bindings_to_buffer_list = false;
939 }
940
941 /* Add all bound buffers used by compute to the buffer list.
942 * This is called by the first compute call in a batch when we want to inherit
943 * all bindings set by the previous batch.
944 */
945 static void
tc_add_all_compute_bindings_to_buffer_list(struct threaded_context * tc)946 tc_add_all_compute_bindings_to_buffer_list(struct threaded_context *tc)
947 {
948 BITSET_WORD *buffer_list = tc->buffer_lists[tc->next_buf_list].buffer_list;
949
950 tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_COMPUTE);
951 tc->add_all_compute_bindings_to_buffer_list = false;
952 }
953
954 static unsigned
tc_rebind_buffer(struct threaded_context * tc,uint32_t old_id,uint32_t new_id,uint32_t * rebind_mask)955 tc_rebind_buffer(struct threaded_context *tc, uint32_t old_id, uint32_t new_id, uint32_t *rebind_mask)
956 {
957 unsigned vbo = 0, so = 0;
958
959 vbo = tc_rebind_bindings(old_id, new_id, tc->vertex_buffers,
960 tc->num_vertex_buffers);
961 if (vbo)
962 *rebind_mask |= BITFIELD_BIT(TC_BINDING_VERTEX_BUFFER);
963
964 if (tc->seen_streamout_buffers) {
965 so = tc_rebind_bindings(old_id, new_id, tc->streamout_buffers,
966 PIPE_MAX_SO_BUFFERS);
967 if (so)
968 *rebind_mask |= BITFIELD_BIT(TC_BINDING_STREAMOUT_BUFFER);
969 }
970 unsigned rebound = vbo + so;
971
972 rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_VERTEX, rebind_mask);
973 rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_FRAGMENT, rebind_mask);
974
975 if (tc->seen_tcs)
976 rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_TESS_CTRL, rebind_mask);
977 if (tc->seen_tes)
978 rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_TESS_EVAL, rebind_mask);
979 if (tc->seen_gs)
980 rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_GEOMETRY, rebind_mask);
981
982 rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_COMPUTE, rebind_mask);
983
984 if (rebound)
985 BITSET_SET(tc->buffer_lists[tc->next_buf_list].buffer_list, new_id & TC_BUFFER_ID_MASK);
986 return rebound;
987 }
988
989 static bool
tc_is_buffer_bound_with_mask(uint32_t id,uint32_t * bindings,unsigned binding_mask)990 tc_is_buffer_bound_with_mask(uint32_t id, uint32_t *bindings, unsigned binding_mask)
991 {
992 while (binding_mask) {
993 if (bindings[u_bit_scan(&binding_mask)] == id)
994 return true;
995 }
996 return false;
997 }
998
999 static bool
tc_is_buffer_shader_bound_for_write(struct threaded_context * tc,uint32_t id,enum pipe_shader_type shader)1000 tc_is_buffer_shader_bound_for_write(struct threaded_context *tc, uint32_t id,
1001 enum pipe_shader_type shader)
1002 {
1003 if (tc->seen_shader_buffers[shader] &&
1004 tc_is_buffer_bound_with_mask(id, tc->shader_buffers[shader],
1005 tc->shader_buffers_writeable_mask[shader]))
1006 return true;
1007
1008 if (tc->seen_image_buffers[shader] &&
1009 tc_is_buffer_bound_with_mask(id, tc->image_buffers[shader],
1010 tc->image_buffers_writeable_mask[shader]))
1011 return true;
1012
1013 return false;
1014 }
1015
1016 static bool
tc_is_buffer_bound_for_write(struct threaded_context * tc,uint32_t id)1017 tc_is_buffer_bound_for_write(struct threaded_context *tc, uint32_t id)
1018 {
1019 if (tc->seen_streamout_buffers &&
1020 tc_is_buffer_bound_with_mask(id, tc->streamout_buffers,
1021 BITFIELD_MASK(PIPE_MAX_SO_BUFFERS)))
1022 return true;
1023
1024 if (tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_VERTEX) ||
1025 tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_FRAGMENT) ||
1026 tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_COMPUTE))
1027 return true;
1028
1029 if (tc->seen_tcs &&
1030 tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_TESS_CTRL))
1031 return true;
1032
1033 if (tc->seen_tes &&
1034 tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_TESS_EVAL))
1035 return true;
1036
1037 if (tc->seen_gs &&
1038 tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_GEOMETRY))
1039 return true;
1040
1041 return false;
1042 }
1043
1044 static bool
tc_is_buffer_busy(struct threaded_context * tc,struct threaded_resource * tbuf,unsigned map_usage)1045 tc_is_buffer_busy(struct threaded_context *tc, struct threaded_resource *tbuf,
1046 unsigned map_usage)
1047 {
1048 if (!tc->options.is_resource_busy)
1049 return true;
1050
1051 uint32_t id_hash = tbuf->buffer_id_unique & TC_BUFFER_ID_MASK;
1052
1053 for (unsigned i = 0; i < TC_MAX_BUFFER_LISTS; i++) {
1054 struct tc_buffer_list *buf_list = &tc->buffer_lists[i];
1055
1056 /* If the buffer is referenced by a batch that hasn't been flushed (by tc or the driver),
1057 * then the buffer is considered busy. */
1058 if (!util_queue_fence_is_signalled(&buf_list->driver_flushed_fence) &&
1059 BITSET_TEST(buf_list->buffer_list, id_hash))
1060 return true;
1061 }
1062
1063 /* The buffer isn't referenced by any unflushed batch: we can safely ask to the driver whether
1064 * this buffer is busy or not. */
1065 return tc->options.is_resource_busy(tc->pipe->screen, tbuf->latest, map_usage);
1066 }
1067
1068 /**
1069 * allow_cpu_storage should be false for user memory and imported buffers.
1070 */
1071 void
threaded_resource_init(struct pipe_resource * res,bool allow_cpu_storage)1072 threaded_resource_init(struct pipe_resource *res, bool allow_cpu_storage)
1073 {
1074 struct threaded_resource *tres = threaded_resource(res);
1075
1076 tres->latest = &tres->b;
1077 tres->cpu_storage = NULL;
1078 util_range_init(&tres->valid_buffer_range);
1079 tres->is_shared = false;
1080 tres->is_user_ptr = false;
1081 tres->buffer_id_unique = 0;
1082 tres->pending_staging_uploads = 0;
1083 tres->last_batch_usage = -1;
1084 util_range_init(&tres->pending_staging_uploads_range);
1085
1086 if (allow_cpu_storage &&
1087 !(res->flags & (PIPE_RESOURCE_FLAG_MAP_PERSISTENT |
1088 PIPE_RESOURCE_FLAG_SPARSE |
1089 PIPE_RESOURCE_FLAG_ENCRYPTED)) &&
1090 /* We need buffer invalidation and buffer busyness tracking for the CPU
1091 * storage, which aren't supported with pipe_vertex_state. */
1092 !(res->bind & PIPE_BIND_VERTEX_STATE))
1093 tres->allow_cpu_storage = true;
1094 else
1095 tres->allow_cpu_storage = false;
1096 }
1097
1098 void
threaded_resource_deinit(struct pipe_resource * res)1099 threaded_resource_deinit(struct pipe_resource *res)
1100 {
1101 struct threaded_resource *tres = threaded_resource(res);
1102
1103 if (tres->latest != &tres->b)
1104 pipe_resource_reference(&tres->latest, NULL);
1105 util_range_destroy(&tres->valid_buffer_range);
1106 util_range_destroy(&tres->pending_staging_uploads_range);
1107 align_free(tres->cpu_storage);
1108 }
1109
1110 struct pipe_context *
threaded_context_unwrap_sync(struct pipe_context * pipe)1111 threaded_context_unwrap_sync(struct pipe_context *pipe)
1112 {
1113 if (!pipe || !pipe->priv)
1114 return pipe;
1115
1116 tc_sync(threaded_context(pipe));
1117 return (struct pipe_context*)pipe->priv;
1118 }
1119
1120
1121 /********************************************************************
1122 * simple functions
1123 */
1124
1125 #define TC_FUNC1(func, qualifier, type, deref, addr, ...) \
1126 struct tc_call_##func { \
1127 struct tc_call_base base; \
1128 type state; \
1129 }; \
1130 \
1131 static uint16_t \
1132 tc_call_##func(struct pipe_context *pipe, void *call) \
1133 { \
1134 pipe->func(pipe, addr(to_call(call, tc_call_##func)->state)); \
1135 return call_size(tc_call_##func); \
1136 } \
1137 \
1138 static void \
1139 tc_##func(struct pipe_context *_pipe, qualifier type deref param) \
1140 { \
1141 struct threaded_context *tc = threaded_context(_pipe); \
1142 struct tc_call_##func *p = (struct tc_call_##func*) \
1143 tc_add_call(tc, TC_CALL_##func, tc_call_##func); \
1144 p->state = deref(param); \
1145 __VA_ARGS__; \
1146 }
1147
1148 TC_FUNC1(set_active_query_state, , bool, , )
1149
1150 TC_FUNC1(set_blend_color, const, struct pipe_blend_color, *, &)
1151 TC_FUNC1(set_stencil_ref, const, struct pipe_stencil_ref, , )
1152 TC_FUNC1(set_clip_state, const, struct pipe_clip_state, *, &)
1153 TC_FUNC1(set_sample_mask, , unsigned, , )
1154 TC_FUNC1(set_min_samples, , unsigned, , )
1155 TC_FUNC1(set_polygon_stipple, const, struct pipe_poly_stipple, *, &)
1156
1157 TC_FUNC1(texture_barrier, , unsigned, , )
1158 TC_FUNC1(memory_barrier, , unsigned, , )
1159 TC_FUNC1(delete_texture_handle, , uint64_t, , )
1160 TC_FUNC1(delete_image_handle, , uint64_t, , )
1161 TC_FUNC1(set_frontend_noop, , bool, , )
1162
1163
1164 /********************************************************************
1165 * queries
1166 */
1167
1168 static struct pipe_query *
tc_create_query(struct pipe_context * _pipe,unsigned query_type,unsigned index)1169 tc_create_query(struct pipe_context *_pipe, unsigned query_type,
1170 unsigned index)
1171 {
1172 struct threaded_context *tc = threaded_context(_pipe);
1173 struct pipe_context *pipe = tc->pipe;
1174
1175 return pipe->create_query(pipe, query_type, index);
1176 }
1177
1178 static struct pipe_query *
tc_create_batch_query(struct pipe_context * _pipe,unsigned num_queries,unsigned * query_types)1179 tc_create_batch_query(struct pipe_context *_pipe, unsigned num_queries,
1180 unsigned *query_types)
1181 {
1182 struct threaded_context *tc = threaded_context(_pipe);
1183 struct pipe_context *pipe = tc->pipe;
1184
1185 return pipe->create_batch_query(pipe, num_queries, query_types);
1186 }
1187
1188 struct tc_query_call {
1189 struct tc_call_base base;
1190 struct pipe_query *query;
1191 };
1192
1193 static uint16_t
tc_call_destroy_query(struct pipe_context * pipe,void * call)1194 tc_call_destroy_query(struct pipe_context *pipe, void *call)
1195 {
1196 struct pipe_query *query = to_call(call, tc_query_call)->query;
1197 struct threaded_query *tq = threaded_query(query);
1198
1199 if (list_is_linked(&tq->head_unflushed))
1200 list_del(&tq->head_unflushed);
1201
1202 pipe->destroy_query(pipe, query);
1203 return call_size(tc_query_call);
1204 }
1205
1206 static void
tc_destroy_query(struct pipe_context * _pipe,struct pipe_query * query)1207 tc_destroy_query(struct pipe_context *_pipe, struct pipe_query *query)
1208 {
1209 struct threaded_context *tc = threaded_context(_pipe);
1210
1211 tc_add_call(tc, TC_CALL_destroy_query, tc_query_call)->query = query;
1212 }
1213
1214 static uint16_t
tc_call_begin_query(struct pipe_context * pipe,void * call)1215 tc_call_begin_query(struct pipe_context *pipe, void *call)
1216 {
1217 pipe->begin_query(pipe, to_call(call, tc_query_call)->query);
1218 return call_size(tc_query_call);
1219 }
1220
1221 static bool
tc_begin_query(struct pipe_context * _pipe,struct pipe_query * query)1222 tc_begin_query(struct pipe_context *_pipe, struct pipe_query *query)
1223 {
1224 struct threaded_context *tc = threaded_context(_pipe);
1225 tc->num_queries_active++;
1226
1227 tc_add_call(tc, TC_CALL_begin_query, tc_query_call)->query = query;
1228 return true; /* we don't care about the return value for this call */
1229 }
1230
1231 struct tc_end_query_call {
1232 struct tc_call_base base;
1233 struct threaded_context *tc;
1234 struct pipe_query *query;
1235 };
1236
1237 static uint16_t
tc_call_end_query(struct pipe_context * pipe,void * call)1238 tc_call_end_query(struct pipe_context *pipe, void *call)
1239 {
1240 struct tc_end_query_call *p = to_call(call, tc_end_query_call);
1241 struct threaded_query *tq = threaded_query(p->query);
1242
1243 if (!list_is_linked(&tq->head_unflushed))
1244 list_add(&tq->head_unflushed, &p->tc->unflushed_queries);
1245
1246 pipe->end_query(pipe, p->query);
1247 return call_size(tc_end_query_call);
1248 }
1249
1250 static bool
tc_end_query(struct pipe_context * _pipe,struct pipe_query * query)1251 tc_end_query(struct pipe_context *_pipe, struct pipe_query *query)
1252 {
1253 struct threaded_context *tc = threaded_context(_pipe);
1254 struct threaded_query *tq = threaded_query(query);
1255 struct tc_end_query_call *call =
1256 tc_add_call(tc, TC_CALL_end_query, tc_end_query_call);
1257 tc->num_queries_active--;
1258
1259 call->tc = tc;
1260 call->query = query;
1261
1262 tq->flushed = false;
1263 tc->query_ended = true;
1264
1265 return true; /* we don't care about the return value for this call */
1266 }
1267
1268 static bool
tc_get_query_result(struct pipe_context * _pipe,struct pipe_query * query,bool wait,union pipe_query_result * result)1269 tc_get_query_result(struct pipe_context *_pipe,
1270 struct pipe_query *query, bool wait,
1271 union pipe_query_result *result)
1272 {
1273 struct threaded_context *tc = threaded_context(_pipe);
1274 struct threaded_query *tq = threaded_query(query);
1275 struct pipe_context *pipe = tc->pipe;
1276 bool flushed = tq->flushed;
1277
1278 if (!flushed) {
1279 tc_sync_msg(tc, wait ? "wait" : "nowait");
1280 tc_set_driver_thread(tc);
1281 }
1282
1283 bool success = pipe->get_query_result(pipe, query, wait, result);
1284
1285 if (!flushed)
1286 tc_clear_driver_thread(tc);
1287
1288 if (success) {
1289 tq->flushed = true;
1290 if (list_is_linked(&tq->head_unflushed)) {
1291 /* This is safe because it can only happen after we sync'd. */
1292 list_del(&tq->head_unflushed);
1293 }
1294 }
1295 return success;
1296 }
1297
1298 struct tc_query_result_resource {
1299 struct tc_call_base base;
1300 enum pipe_query_flags flags:8;
1301 enum pipe_query_value_type result_type:8;
1302 int8_t index; /* it can be -1 */
1303 unsigned offset;
1304 struct pipe_query *query;
1305 struct pipe_resource *resource;
1306 };
1307
1308 static uint16_t
tc_call_get_query_result_resource(struct pipe_context * pipe,void * call)1309 tc_call_get_query_result_resource(struct pipe_context *pipe, void *call)
1310 {
1311 struct tc_query_result_resource *p = to_call(call, tc_query_result_resource);
1312
1313 pipe->get_query_result_resource(pipe, p->query, p->flags, p->result_type,
1314 p->index, p->resource, p->offset);
1315 tc_drop_resource_reference(p->resource);
1316 return call_size(tc_query_result_resource);
1317 }
1318
1319 static void
tc_get_query_result_resource(struct pipe_context * _pipe,struct pipe_query * query,enum pipe_query_flags flags,enum pipe_query_value_type result_type,int index,struct pipe_resource * resource,unsigned offset)1320 tc_get_query_result_resource(struct pipe_context *_pipe,
1321 struct pipe_query *query,
1322 enum pipe_query_flags flags,
1323 enum pipe_query_value_type result_type, int index,
1324 struct pipe_resource *resource, unsigned offset)
1325 {
1326 struct threaded_context *tc = threaded_context(_pipe);
1327
1328 tc_buffer_disable_cpu_storage(resource);
1329
1330 struct tc_query_result_resource *p =
1331 tc_add_call(tc, TC_CALL_get_query_result_resource,
1332 tc_query_result_resource);
1333 p->query = query;
1334 p->flags = flags;
1335 p->result_type = result_type;
1336 p->index = index;
1337 tc_set_resource_reference(&p->resource, resource);
1338 tc_add_to_buffer_list(&tc->buffer_lists[tc->next_buf_list], resource);
1339 p->offset = offset;
1340 }
1341
1342 struct tc_render_condition {
1343 struct tc_call_base base;
1344 bool condition;
1345 unsigned mode;
1346 struct pipe_query *query;
1347 };
1348
1349 static uint16_t
tc_call_render_condition(struct pipe_context * pipe,void * call)1350 tc_call_render_condition(struct pipe_context *pipe, void *call)
1351 {
1352 struct tc_render_condition *p = to_call(call, tc_render_condition);
1353 pipe->render_condition(pipe, p->query, p->condition, p->mode);
1354 return call_size(tc_render_condition);
1355 }
1356
1357 static void
tc_render_condition(struct pipe_context * _pipe,struct pipe_query * query,bool condition,enum pipe_render_cond_flag mode)1358 tc_render_condition(struct pipe_context *_pipe,
1359 struct pipe_query *query, bool condition,
1360 enum pipe_render_cond_flag mode)
1361 {
1362 struct threaded_context *tc = threaded_context(_pipe);
1363 struct tc_render_condition *p =
1364 tc_add_call(tc, TC_CALL_render_condition, tc_render_condition);
1365
1366 p->query = query;
1367 p->condition = condition;
1368 p->mode = mode;
1369 }
1370
1371
1372 /********************************************************************
1373 * constant (immutable) states
1374 */
1375
1376 #define TC_CSO_CREATE(name, sname) \
1377 static void * \
1378 tc_create_##name##_state(struct pipe_context *_pipe, \
1379 const struct pipe_##sname##_state *state) \
1380 { \
1381 struct pipe_context *pipe = threaded_context(_pipe)->pipe; \
1382 return pipe->create_##name##_state(pipe, state); \
1383 }
1384
1385 #define TC_CSO_BIND(name, ...) TC_FUNC1(bind_##name##_state, , void *, , , ##__VA_ARGS__)
1386 #define TC_CSO_DELETE(name) TC_FUNC1(delete_##name##_state, , void *, , )
1387
1388 #define TC_CSO(name, sname, ...) \
1389 TC_CSO_CREATE(name, sname) \
1390 TC_CSO_BIND(name, ##__VA_ARGS__) \
1391 TC_CSO_DELETE(name)
1392
1393 #define TC_CSO_WHOLE(name) TC_CSO(name, name)
1394 #define TC_CSO_SHADER(name) TC_CSO(name, shader)
1395 #define TC_CSO_SHADER_TRACK(name) TC_CSO(name, shader, tc->seen_##name = true;)
1396
1397 TC_CSO_WHOLE(blend)
TC_CSO_WHOLE(rasterizer)1398 TC_CSO_WHOLE(rasterizer)
1399 TC_CSO_CREATE(depth_stencil_alpha, depth_stencil_alpha)
1400 TC_CSO_BIND(depth_stencil_alpha,
1401 if (param && tc->options.parse_renderpass_info) {
1402 /* dsa info is only ever added during a renderpass;
1403 * changes outside of a renderpass reset the data
1404 */
1405 if (!tc->in_renderpass) {
1406 tc_get_renderpass_info(tc)->zsbuf_write_dsa = 0;
1407 tc_get_renderpass_info(tc)->zsbuf_read_dsa = 0;
1408 }
1409 /* let the driver parse its own state */
1410 tc->options.dsa_parse(param, tc_get_renderpass_info(tc));
1411 }
1412 )
1413 TC_CSO_DELETE(depth_stencil_alpha)
1414 TC_CSO_WHOLE(compute)
1415 TC_CSO_CREATE(fs, shader)
1416 TC_CSO_BIND(fs,
1417 if (param && tc->options.parse_renderpass_info) {
1418 /* fs info is only ever added during a renderpass;
1419 * changes outside of a renderpass reset the data
1420 */
1421 if (!tc->in_renderpass) {
1422 tc_get_renderpass_info(tc)->cbuf_fbfetch = 0;
1423 tc_get_renderpass_info(tc)->zsbuf_write_fs = 0;
1424 }
1425 /* let the driver parse its own state */
1426 tc->options.fs_parse(param, tc_get_renderpass_info(tc));
1427 }
1428 )
1429 TC_CSO_DELETE(fs)
1430 TC_CSO_SHADER(vs)
1431 TC_CSO_SHADER_TRACK(gs)
1432 TC_CSO_SHADER_TRACK(tcs)
1433 TC_CSO_SHADER_TRACK(tes)
1434 TC_CSO_CREATE(sampler, sampler)
1435 TC_CSO_DELETE(sampler)
1436 TC_CSO_BIND(vertex_elements)
1437 TC_CSO_DELETE(vertex_elements)
1438
1439 static void *
1440 tc_create_vertex_elements_state(struct pipe_context *_pipe, unsigned count,
1441 const struct pipe_vertex_element *elems)
1442 {
1443 struct pipe_context *pipe = threaded_context(_pipe)->pipe;
1444
1445 return pipe->create_vertex_elements_state(pipe, count, elems);
1446 }
1447
1448 struct tc_sampler_states {
1449 struct tc_call_base base;
1450 uint8_t shader, start, count;
1451 void *slot[0]; /* more will be allocated if needed */
1452 };
1453
1454 static uint16_t
tc_call_bind_sampler_states(struct pipe_context * pipe,void * call)1455 tc_call_bind_sampler_states(struct pipe_context *pipe, void *call)
1456 {
1457 struct tc_sampler_states *p = (struct tc_sampler_states *)call;
1458
1459 pipe->bind_sampler_states(pipe, p->shader, p->start, p->count, p->slot);
1460 return p->base.num_slots;
1461 }
1462
1463 static void
tc_bind_sampler_states(struct pipe_context * _pipe,enum pipe_shader_type shader,unsigned start,unsigned count,void ** states)1464 tc_bind_sampler_states(struct pipe_context *_pipe,
1465 enum pipe_shader_type shader,
1466 unsigned start, unsigned count, void **states)
1467 {
1468 if (!count)
1469 return;
1470
1471 struct threaded_context *tc = threaded_context(_pipe);
1472 struct tc_sampler_states *p =
1473 tc_add_slot_based_call(tc, TC_CALL_bind_sampler_states, tc_sampler_states, count);
1474
1475 p->shader = shader;
1476 p->start = start;
1477 p->count = count;
1478 memcpy(p->slot, states, count * sizeof(states[0]));
1479 }
1480
1481 static void
tc_link_shader(struct pipe_context * _pipe,void ** shaders)1482 tc_link_shader(struct pipe_context *_pipe, void **shaders)
1483 {
1484 struct threaded_context *tc = threaded_context(_pipe);
1485 tc->pipe->link_shader(tc->pipe, shaders);
1486 }
1487 /********************************************************************
1488 * immediate states
1489 */
1490
1491 struct tc_framebuffer {
1492 struct tc_call_base base;
1493 struct pipe_framebuffer_state state;
1494 };
1495
1496 static uint16_t
tc_call_set_framebuffer_state(struct pipe_context * pipe,void * call)1497 tc_call_set_framebuffer_state(struct pipe_context *pipe, void *call)
1498 {
1499 struct pipe_framebuffer_state *p = &to_call(call, tc_framebuffer)->state;
1500
1501 pipe->set_framebuffer_state(pipe, p);
1502
1503 unsigned nr_cbufs = p->nr_cbufs;
1504 for (unsigned i = 0; i < nr_cbufs; i++)
1505 tc_drop_surface_reference(p->cbufs[i]);
1506 tc_drop_surface_reference(p->zsbuf);
1507 tc_drop_resource_reference(p->resolve);
1508 return call_size(tc_framebuffer);
1509 }
1510
1511 static void
tc_set_framebuffer_state(struct pipe_context * _pipe,const struct pipe_framebuffer_state * fb)1512 tc_set_framebuffer_state(struct pipe_context *_pipe,
1513 const struct pipe_framebuffer_state *fb)
1514 {
1515 struct threaded_context *tc = threaded_context(_pipe);
1516 struct tc_framebuffer *p =
1517 tc_add_call(tc, TC_CALL_set_framebuffer_state, tc_framebuffer);
1518 unsigned nr_cbufs = fb->nr_cbufs;
1519
1520 p->state.width = fb->width;
1521 p->state.height = fb->height;
1522 p->state.samples = fb->samples;
1523 p->state.layers = fb->layers;
1524 p->state.nr_cbufs = nr_cbufs;
1525
1526 /* when unbinding, mark attachments as used for the current batch */
1527 for (unsigned i = 0; i < tc->nr_cbufs; i++) {
1528 tc_set_resource_batch_usage_persistent(tc, tc->fb_resources[i], false);
1529 pipe_resource_reference(&tc->fb_resources[i], NULL);
1530 }
1531 tc_set_resource_batch_usage_persistent(tc, tc->fb_resources[PIPE_MAX_COLOR_BUFS], false);
1532 tc_set_resource_batch_usage_persistent(tc, tc->fb_resolve, false);
1533
1534 for (unsigned i = 0; i < nr_cbufs; i++) {
1535 p->state.cbufs[i] = NULL;
1536 pipe_surface_reference(&p->state.cbufs[i], fb->cbufs[i]);
1537 /* full tracking requires storing the fb attachment resources */
1538 if (fb->cbufs[i])
1539 pipe_resource_reference(&tc->fb_resources[i], fb->cbufs[i]->texture);
1540 tc_set_resource_batch_usage_persistent(tc, tc->fb_resources[i], true);
1541 }
1542 tc->nr_cbufs = nr_cbufs;
1543 if (tc->options.parse_renderpass_info) {
1544 /* ensure this is treated as the first fb set if no fb activity has occurred */
1545 if (!tc->renderpass_info_recording->has_draw &&
1546 !tc->renderpass_info_recording->cbuf_clear &&
1547 !tc->renderpass_info_recording->cbuf_load &&
1548 !tc->renderpass_info_recording->zsbuf_load &&
1549 !tc->renderpass_info_recording->zsbuf_clear_partial)
1550 tc->batch_slots[tc->next].first_set_fb = false;
1551 /* store existing zsbuf data for possible persistence */
1552 uint8_t zsbuf = tc->renderpass_info_recording->has_draw ?
1553 0 :
1554 tc->renderpass_info_recording->data8[3];
1555 bool zsbuf_changed = tc->fb_resources[PIPE_MAX_COLOR_BUFS] !=
1556 (fb->zsbuf ? fb->zsbuf->texture : NULL);
1557
1558 if (tc->seen_fb_state) {
1559 /* this is the end of a renderpass, so increment the renderpass info */
1560 tc_batch_increment_renderpass_info(tc, tc->next, false);
1561 /* if zsbuf hasn't changed (i.e., possibly just adding a color buffer):
1562 * keep zsbuf usage data
1563 */
1564 if (!zsbuf_changed)
1565 tc->renderpass_info_recording->data8[3] = zsbuf;
1566 } else {
1567 /* this is the first time a set_framebuffer_call is triggered;
1568 * just increment the index and keep using the existing info for recording
1569 */
1570 tc->batch_slots[tc->next].renderpass_info_idx = 0;
1571 }
1572 /* future fb state changes will increment the index */
1573 tc->seen_fb_state = true;
1574 }
1575 pipe_resource_reference(&tc->fb_resources[PIPE_MAX_COLOR_BUFS],
1576 fb->zsbuf ? fb->zsbuf->texture : NULL);
1577 pipe_resource_reference(&tc->fb_resolve, fb->resolve);
1578 tc_set_resource_batch_usage_persistent(tc, tc->fb_resources[PIPE_MAX_COLOR_BUFS], true);
1579 tc_set_resource_batch_usage_persistent(tc, tc->fb_resolve, true);
1580 tc->in_renderpass = false;
1581 p->state.zsbuf = NULL;
1582 pipe_surface_reference(&p->state.zsbuf, fb->zsbuf);
1583 p->state.resolve = NULL;
1584 pipe_resource_reference(&p->state.resolve, fb->resolve);
1585 }
1586
1587 struct tc_tess_state {
1588 struct tc_call_base base;
1589 float state[6];
1590 };
1591
1592 static uint16_t
tc_call_set_tess_state(struct pipe_context * pipe,void * call)1593 tc_call_set_tess_state(struct pipe_context *pipe, void *call)
1594 {
1595 float *p = to_call(call, tc_tess_state)->state;
1596
1597 pipe->set_tess_state(pipe, p, p + 4);
1598 return call_size(tc_tess_state);
1599 }
1600
1601 static void
tc_set_tess_state(struct pipe_context * _pipe,const float default_outer_level[4],const float default_inner_level[2])1602 tc_set_tess_state(struct pipe_context *_pipe,
1603 const float default_outer_level[4],
1604 const float default_inner_level[2])
1605 {
1606 struct threaded_context *tc = threaded_context(_pipe);
1607 float *p = tc_add_call(tc, TC_CALL_set_tess_state, tc_tess_state)->state;
1608
1609 memcpy(p, default_outer_level, 4 * sizeof(float));
1610 memcpy(p + 4, default_inner_level, 2 * sizeof(float));
1611 }
1612
1613 struct tc_patch_vertices {
1614 struct tc_call_base base;
1615 uint8_t patch_vertices;
1616 };
1617
1618 static uint16_t
tc_call_set_patch_vertices(struct pipe_context * pipe,void * call)1619 tc_call_set_patch_vertices(struct pipe_context *pipe, void *call)
1620 {
1621 uint8_t patch_vertices = to_call(call, tc_patch_vertices)->patch_vertices;
1622
1623 pipe->set_patch_vertices(pipe, patch_vertices);
1624 return call_size(tc_patch_vertices);
1625 }
1626
1627 static void
tc_set_patch_vertices(struct pipe_context * _pipe,uint8_t patch_vertices)1628 tc_set_patch_vertices(struct pipe_context *_pipe, uint8_t patch_vertices)
1629 {
1630 struct threaded_context *tc = threaded_context(_pipe);
1631
1632 tc_add_call(tc, TC_CALL_set_patch_vertices,
1633 tc_patch_vertices)->patch_vertices = patch_vertices;
1634 }
1635
1636 struct tc_constant_buffer_base {
1637 struct tc_call_base base;
1638 uint8_t shader, index;
1639 bool is_null;
1640 };
1641
1642 struct tc_constant_buffer {
1643 struct tc_constant_buffer_base base;
1644 struct pipe_constant_buffer cb;
1645 };
1646
1647 static uint16_t
tc_call_set_constant_buffer(struct pipe_context * pipe,void * call)1648 tc_call_set_constant_buffer(struct pipe_context *pipe, void *call)
1649 {
1650 struct tc_constant_buffer *p = (struct tc_constant_buffer *)call;
1651
1652 if (unlikely(p->base.is_null)) {
1653 pipe->set_constant_buffer(pipe, p->base.shader, p->base.index, false, NULL);
1654 return call_size(tc_constant_buffer_base);
1655 }
1656
1657 pipe->set_constant_buffer(pipe, p->base.shader, p->base.index, true, &p->cb);
1658 return call_size(tc_constant_buffer);
1659 }
1660
1661 static void
tc_set_constant_buffer(struct pipe_context * _pipe,enum pipe_shader_type shader,uint index,bool take_ownership,const struct pipe_constant_buffer * cb)1662 tc_set_constant_buffer(struct pipe_context *_pipe,
1663 enum pipe_shader_type shader, uint index,
1664 bool take_ownership,
1665 const struct pipe_constant_buffer *cb)
1666 {
1667 struct threaded_context *tc = threaded_context(_pipe);
1668
1669 if (unlikely(!cb || (!cb->buffer && !cb->user_buffer))) {
1670 struct tc_constant_buffer_base *p =
1671 tc_add_call(tc, TC_CALL_set_constant_buffer, tc_constant_buffer_base);
1672 p->shader = shader;
1673 p->index = index;
1674 p->is_null = true;
1675 tc_unbind_buffer(&tc->const_buffers[shader][index]);
1676 return;
1677 }
1678
1679 struct pipe_resource *buffer;
1680 unsigned offset;
1681
1682 if (cb->user_buffer) {
1683 /* This must be done before adding set_constant_buffer, because it could
1684 * generate e.g. transfer_unmap and flush partially-uninitialized
1685 * set_constant_buffer to the driver if it was done afterwards.
1686 */
1687 buffer = NULL;
1688 u_upload_data(tc->base.const_uploader, 0, cb->buffer_size,
1689 tc->ubo_alignment, cb->user_buffer, &offset, &buffer);
1690 u_upload_unmap(tc->base.const_uploader);
1691 take_ownership = true;
1692 } else {
1693 buffer = cb->buffer;
1694 offset = cb->buffer_offset;
1695 }
1696
1697 struct tc_constant_buffer *p =
1698 tc_add_call(tc, TC_CALL_set_constant_buffer, tc_constant_buffer);
1699 p->base.shader = shader;
1700 p->base.index = index;
1701 p->base.is_null = false;
1702 p->cb.user_buffer = NULL;
1703 p->cb.buffer_offset = offset;
1704 p->cb.buffer_size = cb->buffer_size;
1705
1706 if (take_ownership)
1707 p->cb.buffer = buffer;
1708 else
1709 tc_set_resource_reference(&p->cb.buffer, buffer);
1710
1711 if (buffer) {
1712 tc_bind_buffer(&tc->const_buffers[shader][index],
1713 &tc->buffer_lists[tc->next_buf_list], buffer);
1714 } else {
1715 tc_unbind_buffer(&tc->const_buffers[shader][index]);
1716 }
1717 }
1718
1719 struct tc_inlinable_constants {
1720 struct tc_call_base base;
1721 uint8_t shader;
1722 uint8_t num_values;
1723 uint32_t values[MAX_INLINABLE_UNIFORMS];
1724 };
1725
1726 static uint16_t
tc_call_set_inlinable_constants(struct pipe_context * pipe,void * call)1727 tc_call_set_inlinable_constants(struct pipe_context *pipe, void *call)
1728 {
1729 struct tc_inlinable_constants *p = to_call(call, tc_inlinable_constants);
1730
1731 pipe->set_inlinable_constants(pipe, p->shader, p->num_values, p->values);
1732 return call_size(tc_inlinable_constants);
1733 }
1734
1735 static void
tc_set_inlinable_constants(struct pipe_context * _pipe,enum pipe_shader_type shader,uint num_values,uint32_t * values)1736 tc_set_inlinable_constants(struct pipe_context *_pipe,
1737 enum pipe_shader_type shader,
1738 uint num_values, uint32_t *values)
1739 {
1740 struct threaded_context *tc = threaded_context(_pipe);
1741 struct tc_inlinable_constants *p =
1742 tc_add_call(tc, TC_CALL_set_inlinable_constants, tc_inlinable_constants);
1743 p->shader = shader;
1744 p->num_values = num_values;
1745 memcpy(p->values, values, num_values * 4);
1746 }
1747
1748 struct tc_sample_locations {
1749 struct tc_call_base base;
1750 uint16_t size;
1751 uint8_t slot[0];
1752 };
1753
1754
1755 static uint16_t
tc_call_set_sample_locations(struct pipe_context * pipe,void * call)1756 tc_call_set_sample_locations(struct pipe_context *pipe, void *call)
1757 {
1758 struct tc_sample_locations *p = (struct tc_sample_locations *)call;
1759
1760 pipe->set_sample_locations(pipe, p->size, p->slot);
1761 return p->base.num_slots;
1762 }
1763
1764 static void
tc_set_sample_locations(struct pipe_context * _pipe,size_t size,const uint8_t * locations)1765 tc_set_sample_locations(struct pipe_context *_pipe, size_t size, const uint8_t *locations)
1766 {
1767 struct threaded_context *tc = threaded_context(_pipe);
1768 struct tc_sample_locations *p =
1769 tc_add_slot_based_call(tc, TC_CALL_set_sample_locations,
1770 tc_sample_locations, size);
1771
1772 p->size = size;
1773 memcpy(p->slot, locations, size);
1774 }
1775
1776 struct tc_scissors {
1777 struct tc_call_base base;
1778 uint8_t start, count;
1779 struct pipe_scissor_state slot[0]; /* more will be allocated if needed */
1780 };
1781
1782 static uint16_t
tc_call_set_scissor_states(struct pipe_context * pipe,void * call)1783 tc_call_set_scissor_states(struct pipe_context *pipe, void *call)
1784 {
1785 struct tc_scissors *p = (struct tc_scissors *)call;
1786
1787 pipe->set_scissor_states(pipe, p->start, p->count, p->slot);
1788 return p->base.num_slots;
1789 }
1790
1791 static void
tc_set_scissor_states(struct pipe_context * _pipe,unsigned start,unsigned count,const struct pipe_scissor_state * states)1792 tc_set_scissor_states(struct pipe_context *_pipe,
1793 unsigned start, unsigned count,
1794 const struct pipe_scissor_state *states)
1795 {
1796 struct threaded_context *tc = threaded_context(_pipe);
1797 struct tc_scissors *p =
1798 tc_add_slot_based_call(tc, TC_CALL_set_scissor_states, tc_scissors, count);
1799
1800 p->start = start;
1801 p->count = count;
1802 memcpy(&p->slot, states, count * sizeof(states[0]));
1803 }
1804
1805 struct tc_viewports {
1806 struct tc_call_base base;
1807 uint8_t start, count;
1808 struct pipe_viewport_state slot[0]; /* more will be allocated if needed */
1809 };
1810
1811 static uint16_t
tc_call_set_viewport_states(struct pipe_context * pipe,void * call)1812 tc_call_set_viewport_states(struct pipe_context *pipe, void *call)
1813 {
1814 struct tc_viewports *p = (struct tc_viewports *)call;
1815
1816 pipe->set_viewport_states(pipe, p->start, p->count, p->slot);
1817 return p->base.num_slots;
1818 }
1819
1820 static void
tc_set_viewport_states(struct pipe_context * _pipe,unsigned start,unsigned count,const struct pipe_viewport_state * states)1821 tc_set_viewport_states(struct pipe_context *_pipe,
1822 unsigned start, unsigned count,
1823 const struct pipe_viewport_state *states)
1824 {
1825 if (!count)
1826 return;
1827
1828 struct threaded_context *tc = threaded_context(_pipe);
1829 struct tc_viewports *p =
1830 tc_add_slot_based_call(tc, TC_CALL_set_viewport_states, tc_viewports, count);
1831
1832 p->start = start;
1833 p->count = count;
1834 memcpy(&p->slot, states, count * sizeof(states[0]));
1835 }
1836
1837 struct tc_window_rects {
1838 struct tc_call_base base;
1839 bool include;
1840 uint8_t count;
1841 struct pipe_scissor_state slot[0]; /* more will be allocated if needed */
1842 };
1843
1844 static uint16_t
tc_call_set_window_rectangles(struct pipe_context * pipe,void * call)1845 tc_call_set_window_rectangles(struct pipe_context *pipe, void *call)
1846 {
1847 struct tc_window_rects *p = (struct tc_window_rects *)call;
1848
1849 pipe->set_window_rectangles(pipe, p->include, p->count, p->slot);
1850 return p->base.num_slots;
1851 }
1852
1853 static void
tc_set_window_rectangles(struct pipe_context * _pipe,bool include,unsigned count,const struct pipe_scissor_state * rects)1854 tc_set_window_rectangles(struct pipe_context *_pipe, bool include,
1855 unsigned count,
1856 const struct pipe_scissor_state *rects)
1857 {
1858 struct threaded_context *tc = threaded_context(_pipe);
1859 struct tc_window_rects *p =
1860 tc_add_slot_based_call(tc, TC_CALL_set_window_rectangles, tc_window_rects, count);
1861
1862 p->include = include;
1863 p->count = count;
1864 memcpy(p->slot, rects, count * sizeof(rects[0]));
1865 }
1866
1867 struct tc_sampler_views {
1868 struct tc_call_base base;
1869 uint8_t shader, start, count, unbind_num_trailing_slots;
1870 struct pipe_sampler_view *slot[0]; /* more will be allocated if needed */
1871 };
1872
1873 static uint16_t
tc_call_set_sampler_views(struct pipe_context * pipe,void * call)1874 tc_call_set_sampler_views(struct pipe_context *pipe, void *call)
1875 {
1876 struct tc_sampler_views *p = (struct tc_sampler_views *)call;
1877
1878 pipe->set_sampler_views(pipe, p->shader, p->start, p->count,
1879 p->unbind_num_trailing_slots, true, p->slot);
1880 return p->base.num_slots;
1881 }
1882
1883 static void
tc_set_sampler_views(struct pipe_context * _pipe,enum pipe_shader_type shader,unsigned start,unsigned count,unsigned unbind_num_trailing_slots,bool take_ownership,struct pipe_sampler_view ** views)1884 tc_set_sampler_views(struct pipe_context *_pipe,
1885 enum pipe_shader_type shader,
1886 unsigned start, unsigned count,
1887 unsigned unbind_num_trailing_slots, bool take_ownership,
1888 struct pipe_sampler_view **views)
1889 {
1890 if (!count && !unbind_num_trailing_slots)
1891 return;
1892
1893 struct threaded_context *tc = threaded_context(_pipe);
1894 struct tc_sampler_views *p =
1895 tc_add_slot_based_call(tc, TC_CALL_set_sampler_views, tc_sampler_views,
1896 views ? count : 0);
1897
1898 p->shader = shader;
1899 p->start = start;
1900
1901 if (views) {
1902 struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
1903
1904 p->count = count;
1905 p->unbind_num_trailing_slots = unbind_num_trailing_slots;
1906
1907 if (take_ownership) {
1908 memcpy(p->slot, views, sizeof(*views) * count);
1909
1910 for (unsigned i = 0; i < count; i++) {
1911 if (views[i]) {
1912 if (views[i]->target == PIPE_BUFFER)
1913 tc_bind_buffer(&tc->sampler_buffers[shader][start + i], next,
1914 views[i]->texture);
1915 else
1916 tc_set_resource_batch_usage(tc, views[i]->texture);
1917 } else {
1918 tc_unbind_buffer(&tc->sampler_buffers[shader][start + i]);
1919 }
1920 }
1921 } else {
1922 for (unsigned i = 0; i < count; i++) {
1923 p->slot[i] = NULL;
1924 pipe_sampler_view_reference(&p->slot[i], views[i]);
1925
1926 if (views[i]) {
1927 if (views[i]->target == PIPE_BUFFER)
1928 tc_bind_buffer(&tc->sampler_buffers[shader][start + i], next,
1929 views[i]->texture);
1930 else
1931 tc_set_resource_batch_usage(tc, views[i]->texture);
1932 } else {
1933 tc_unbind_buffer(&tc->sampler_buffers[shader][start + i]);
1934 }
1935 }
1936 }
1937
1938 tc_unbind_buffers(&tc->sampler_buffers[shader][start + count],
1939 unbind_num_trailing_slots);
1940 tc->seen_sampler_buffers[shader] = true;
1941 } else {
1942 p->count = 0;
1943 p->unbind_num_trailing_slots = count + unbind_num_trailing_slots;
1944
1945 tc_unbind_buffers(&tc->sampler_buffers[shader][start],
1946 count + unbind_num_trailing_slots);
1947 }
1948 }
1949
1950 struct tc_shader_images {
1951 struct tc_call_base base;
1952 uint8_t shader, start, count;
1953 uint8_t unbind_num_trailing_slots;
1954 struct pipe_image_view slot[0]; /* more will be allocated if needed */
1955 };
1956
1957 static uint16_t
tc_call_set_shader_images(struct pipe_context * pipe,void * call)1958 tc_call_set_shader_images(struct pipe_context *pipe, void *call)
1959 {
1960 struct tc_shader_images *p = (struct tc_shader_images *)call;
1961 unsigned count = p->count;
1962
1963 if (!p->count) {
1964 pipe->set_shader_images(pipe, p->shader, p->start, 0,
1965 p->unbind_num_trailing_slots, NULL);
1966 return call_size(tc_shader_images);
1967 }
1968
1969 pipe->set_shader_images(pipe, p->shader, p->start, p->count,
1970 p->unbind_num_trailing_slots, p->slot);
1971
1972 for (unsigned i = 0; i < count; i++)
1973 tc_drop_resource_reference(p->slot[i].resource);
1974
1975 return p->base.num_slots;
1976 }
1977
1978 static void
tc_set_shader_images(struct pipe_context * _pipe,enum pipe_shader_type shader,unsigned start,unsigned count,unsigned unbind_num_trailing_slots,const struct pipe_image_view * images)1979 tc_set_shader_images(struct pipe_context *_pipe,
1980 enum pipe_shader_type shader,
1981 unsigned start, unsigned count,
1982 unsigned unbind_num_trailing_slots,
1983 const struct pipe_image_view *images)
1984 {
1985 if (!count && !unbind_num_trailing_slots)
1986 return;
1987
1988 struct threaded_context *tc = threaded_context(_pipe);
1989 struct tc_shader_images *p =
1990 tc_add_slot_based_call(tc, TC_CALL_set_shader_images, tc_shader_images,
1991 images ? count : 0);
1992 unsigned writable_buffers = 0;
1993
1994 p->shader = shader;
1995 p->start = start;
1996
1997 if (images) {
1998 p->count = count;
1999 p->unbind_num_trailing_slots = unbind_num_trailing_slots;
2000
2001 struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
2002
2003 for (unsigned i = 0; i < count; i++) {
2004 struct pipe_resource *resource = images[i].resource;
2005
2006 tc_set_resource_reference(&p->slot[i].resource, resource);
2007
2008 if (resource) {
2009 if (resource->target == PIPE_BUFFER) {
2010 tc_bind_buffer(&tc->image_buffers[shader][start + i], next, resource);
2011
2012 if (images[i].access & PIPE_IMAGE_ACCESS_WRITE) {
2013 struct threaded_resource *tres = threaded_resource(resource);
2014
2015 tc_buffer_disable_cpu_storage(resource);
2016 util_range_add(&tres->b, &tres->valid_buffer_range,
2017 images[i].u.buf.offset,
2018 images[i].u.buf.offset + images[i].u.buf.size);
2019 writable_buffers |= BITFIELD_BIT(start + i);
2020 }
2021 } else {
2022 tc_set_resource_batch_usage(tc, resource);
2023 }
2024 } else {
2025 tc_unbind_buffer(&tc->image_buffers[shader][start + i]);
2026 }
2027 }
2028 memcpy(p->slot, images, count * sizeof(images[0]));
2029
2030 tc_unbind_buffers(&tc->image_buffers[shader][start + count],
2031 unbind_num_trailing_slots);
2032 tc->seen_image_buffers[shader] = true;
2033 } else {
2034 p->count = 0;
2035 p->unbind_num_trailing_slots = count + unbind_num_trailing_slots;
2036
2037 tc_unbind_buffers(&tc->image_buffers[shader][start],
2038 count + unbind_num_trailing_slots);
2039 }
2040
2041 tc->image_buffers_writeable_mask[shader] &= ~BITFIELD_RANGE(start, count);
2042 tc->image_buffers_writeable_mask[shader] |= writable_buffers;
2043 }
2044
2045 struct tc_shader_buffers {
2046 struct tc_call_base base;
2047 uint8_t shader, start, count;
2048 bool unbind;
2049 unsigned writable_bitmask;
2050 struct pipe_shader_buffer slot[0]; /* more will be allocated if needed */
2051 };
2052
2053 static uint16_t
tc_call_set_shader_buffers(struct pipe_context * pipe,void * call)2054 tc_call_set_shader_buffers(struct pipe_context *pipe, void *call)
2055 {
2056 struct tc_shader_buffers *p = (struct tc_shader_buffers *)call;
2057 unsigned count = p->count;
2058
2059 if (p->unbind) {
2060 pipe->set_shader_buffers(pipe, p->shader, p->start, p->count, NULL, 0);
2061 return call_size(tc_shader_buffers);
2062 }
2063
2064 pipe->set_shader_buffers(pipe, p->shader, p->start, p->count, p->slot,
2065 p->writable_bitmask);
2066
2067 for (unsigned i = 0; i < count; i++)
2068 tc_drop_resource_reference(p->slot[i].buffer);
2069
2070 return p->base.num_slots;
2071 }
2072
2073 static void
tc_set_shader_buffers(struct pipe_context * _pipe,enum pipe_shader_type shader,unsigned start,unsigned count,const struct pipe_shader_buffer * buffers,unsigned writable_bitmask)2074 tc_set_shader_buffers(struct pipe_context *_pipe,
2075 enum pipe_shader_type shader,
2076 unsigned start, unsigned count,
2077 const struct pipe_shader_buffer *buffers,
2078 unsigned writable_bitmask)
2079 {
2080 if (!count)
2081 return;
2082
2083 struct threaded_context *tc = threaded_context(_pipe);
2084 struct tc_shader_buffers *p =
2085 tc_add_slot_based_call(tc, TC_CALL_set_shader_buffers, tc_shader_buffers,
2086 buffers ? count : 0);
2087
2088 p->shader = shader;
2089 p->start = start;
2090 p->count = count;
2091 p->unbind = buffers == NULL;
2092 p->writable_bitmask = writable_bitmask;
2093
2094 if (buffers) {
2095 struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
2096
2097 for (unsigned i = 0; i < count; i++) {
2098 struct pipe_shader_buffer *dst = &p->slot[i];
2099 const struct pipe_shader_buffer *src = buffers + i;
2100
2101 tc_set_resource_reference(&dst->buffer, src->buffer);
2102 dst->buffer_offset = src->buffer_offset;
2103 dst->buffer_size = src->buffer_size;
2104
2105 if (src->buffer) {
2106 struct threaded_resource *tres = threaded_resource(src->buffer);
2107
2108 tc_bind_buffer(&tc->shader_buffers[shader][start + i], next, &tres->b);
2109
2110 if (writable_bitmask & BITFIELD_BIT(i)) {
2111 tc_buffer_disable_cpu_storage(src->buffer);
2112 util_range_add(&tres->b, &tres->valid_buffer_range,
2113 src->buffer_offset,
2114 src->buffer_offset + src->buffer_size);
2115 }
2116 } else {
2117 tc_unbind_buffer(&tc->shader_buffers[shader][start + i]);
2118 }
2119 }
2120 tc->seen_shader_buffers[shader] = true;
2121 } else {
2122 tc_unbind_buffers(&tc->shader_buffers[shader][start], count);
2123 }
2124
2125 tc->shader_buffers_writeable_mask[shader] &= ~BITFIELD_RANGE(start, count);
2126 tc->shader_buffers_writeable_mask[shader] |= writable_bitmask << start;
2127 }
2128
2129 static uint16_t
tc_call_set_vertex_buffers(struct pipe_context * pipe,void * call)2130 tc_call_set_vertex_buffers(struct pipe_context *pipe, void *call)
2131 {
2132 struct tc_vertex_buffers *p = (struct tc_vertex_buffers *)call;
2133 unsigned count = p->count;
2134
2135 for (unsigned i = 0; i < count; i++)
2136 tc_assert(!p->slot[i].is_user_buffer);
2137
2138 pipe->set_vertex_buffers(pipe, count, p->slot);
2139 return p->base.num_slots;
2140 }
2141
2142 static void
tc_set_vertex_buffers(struct pipe_context * _pipe,unsigned count,const struct pipe_vertex_buffer * buffers)2143 tc_set_vertex_buffers(struct pipe_context *_pipe, unsigned count,
2144 const struct pipe_vertex_buffer *buffers)
2145 {
2146 struct threaded_context *tc = threaded_context(_pipe);
2147
2148 assert(!count || buffers);
2149
2150 if (count) {
2151 struct tc_vertex_buffers *p =
2152 tc_add_slot_based_call(tc, TC_CALL_set_vertex_buffers, tc_vertex_buffers, count);
2153 p->count = count;
2154
2155 struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
2156
2157 memcpy(p->slot, buffers, count * sizeof(struct pipe_vertex_buffer));
2158
2159 for (unsigned i = 0; i < count; i++) {
2160 struct pipe_resource *buf = buffers[i].buffer.resource;
2161
2162 if (buf) {
2163 tc_bind_buffer(&tc->vertex_buffers[i], next, buf);
2164 } else {
2165 tc_unbind_buffer(&tc->vertex_buffers[i]);
2166 }
2167 }
2168 } else {
2169 struct tc_vertex_buffers *p =
2170 tc_add_slot_based_call(tc, TC_CALL_set_vertex_buffers, tc_vertex_buffers, 0);
2171 p->count = 0;
2172 }
2173
2174 /* We don't need to unbind trailing buffers because we never touch bindings
2175 * after num_vertex_buffers.
2176 */
2177 tc->num_vertex_buffers = count;
2178 }
2179
2180 struct pipe_vertex_buffer *
tc_add_set_vertex_buffers_call(struct pipe_context * _pipe,unsigned count)2181 tc_add_set_vertex_buffers_call(struct pipe_context *_pipe, unsigned count)
2182 {
2183 struct threaded_context *tc = threaded_context(_pipe);
2184
2185 /* We don't need to unbind trailing buffers because we never touch bindings
2186 * after num_vertex_buffers.
2187 */
2188 tc->num_vertex_buffers = count;
2189
2190 struct tc_vertex_buffers *p =
2191 tc_add_slot_based_call(tc, TC_CALL_set_vertex_buffers, tc_vertex_buffers, count);
2192 p->count = count;
2193 return p->slot;
2194 }
2195
2196 struct tc_stream_outputs {
2197 struct tc_call_base base;
2198 unsigned count;
2199 struct pipe_stream_output_target *targets[PIPE_MAX_SO_BUFFERS];
2200 unsigned offsets[PIPE_MAX_SO_BUFFERS];
2201 };
2202
2203 static uint16_t
tc_call_set_stream_output_targets(struct pipe_context * pipe,void * call)2204 tc_call_set_stream_output_targets(struct pipe_context *pipe, void *call)
2205 {
2206 struct tc_stream_outputs *p = to_call(call, tc_stream_outputs);
2207 unsigned count = p->count;
2208
2209 pipe->set_stream_output_targets(pipe, count, p->targets, p->offsets);
2210 for (unsigned i = 0; i < count; i++)
2211 tc_drop_so_target_reference(p->targets[i]);
2212
2213 return call_size(tc_stream_outputs);
2214 }
2215
2216 static void
tc_set_stream_output_targets(struct pipe_context * _pipe,unsigned count,struct pipe_stream_output_target ** tgs,const unsigned * offsets)2217 tc_set_stream_output_targets(struct pipe_context *_pipe,
2218 unsigned count,
2219 struct pipe_stream_output_target **tgs,
2220 const unsigned *offsets)
2221 {
2222 struct threaded_context *tc = threaded_context(_pipe);
2223 struct tc_stream_outputs *p =
2224 tc_add_call(tc, TC_CALL_set_stream_output_targets, tc_stream_outputs);
2225 struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
2226
2227 for (unsigned i = 0; i < count; i++) {
2228 p->targets[i] = NULL;
2229 pipe_so_target_reference(&p->targets[i], tgs[i]);
2230 if (tgs[i]) {
2231 tc_buffer_disable_cpu_storage(tgs[i]->buffer);
2232 tc_bind_buffer(&tc->streamout_buffers[i], next, tgs[i]->buffer);
2233 } else {
2234 tc_unbind_buffer(&tc->streamout_buffers[i]);
2235 }
2236 }
2237 p->count = count;
2238 memcpy(p->offsets, offsets, count * sizeof(unsigned));
2239
2240 tc_unbind_buffers(&tc->streamout_buffers[count], PIPE_MAX_SO_BUFFERS - count);
2241 if (count)
2242 tc->seen_streamout_buffers = true;
2243 }
2244
2245 static void
tc_set_compute_resources(struct pipe_context * _pipe,unsigned start,unsigned count,struct pipe_surface ** resources)2246 tc_set_compute_resources(struct pipe_context *_pipe, unsigned start,
2247 unsigned count, struct pipe_surface **resources)
2248 {
2249 struct threaded_context *tc = threaded_context(_pipe);
2250 struct pipe_context *pipe = tc->pipe;
2251
2252 tc_sync(tc);
2253 pipe->set_compute_resources(pipe, start, count, resources);
2254 }
2255
2256 static void
tc_set_global_binding(struct pipe_context * _pipe,unsigned first,unsigned count,struct pipe_resource ** resources,uint32_t ** handles)2257 tc_set_global_binding(struct pipe_context *_pipe, unsigned first,
2258 unsigned count, struct pipe_resource **resources,
2259 uint32_t **handles)
2260 {
2261 struct threaded_context *tc = threaded_context(_pipe);
2262 struct pipe_context *pipe = tc->pipe;
2263
2264 tc_sync(tc);
2265 pipe->set_global_binding(pipe, first, count, resources, handles);
2266 }
2267
2268
2269 /********************************************************************
2270 * views
2271 */
2272
2273 static struct pipe_surface *
tc_create_surface(struct pipe_context * _pipe,struct pipe_resource * resource,const struct pipe_surface * surf_tmpl)2274 tc_create_surface(struct pipe_context *_pipe,
2275 struct pipe_resource *resource,
2276 const struct pipe_surface *surf_tmpl)
2277 {
2278 struct pipe_context *pipe = threaded_context(_pipe)->pipe;
2279 struct pipe_surface *view =
2280 pipe->create_surface(pipe, resource, surf_tmpl);
2281
2282 if (view)
2283 view->context = _pipe;
2284 return view;
2285 }
2286
2287 static void
tc_surface_destroy(struct pipe_context * _pipe,struct pipe_surface * surf)2288 tc_surface_destroy(struct pipe_context *_pipe,
2289 struct pipe_surface *surf)
2290 {
2291 struct pipe_context *pipe = threaded_context(_pipe)->pipe;
2292
2293 pipe->surface_destroy(pipe, surf);
2294 }
2295
2296 static struct pipe_sampler_view *
tc_create_sampler_view(struct pipe_context * _pipe,struct pipe_resource * resource,const struct pipe_sampler_view * templ)2297 tc_create_sampler_view(struct pipe_context *_pipe,
2298 struct pipe_resource *resource,
2299 const struct pipe_sampler_view *templ)
2300 {
2301 struct pipe_context *pipe = threaded_context(_pipe)->pipe;
2302 struct pipe_sampler_view *view =
2303 pipe->create_sampler_view(pipe, resource, templ);
2304
2305 if (view)
2306 view->context = _pipe;
2307 return view;
2308 }
2309
2310 static void
tc_sampler_view_destroy(struct pipe_context * _pipe,struct pipe_sampler_view * view)2311 tc_sampler_view_destroy(struct pipe_context *_pipe,
2312 struct pipe_sampler_view *view)
2313 {
2314 struct pipe_context *pipe = threaded_context(_pipe)->pipe;
2315
2316 pipe->sampler_view_destroy(pipe, view);
2317 }
2318
2319 static struct pipe_stream_output_target *
tc_create_stream_output_target(struct pipe_context * _pipe,struct pipe_resource * res,unsigned buffer_offset,unsigned buffer_size)2320 tc_create_stream_output_target(struct pipe_context *_pipe,
2321 struct pipe_resource *res,
2322 unsigned buffer_offset,
2323 unsigned buffer_size)
2324 {
2325 struct pipe_context *pipe = threaded_context(_pipe)->pipe;
2326 struct threaded_resource *tres = threaded_resource(res);
2327 struct pipe_stream_output_target *view;
2328
2329 util_range_add(&tres->b, &tres->valid_buffer_range, buffer_offset,
2330 buffer_offset + buffer_size);
2331
2332 view = pipe->create_stream_output_target(pipe, res, buffer_offset,
2333 buffer_size);
2334 if (view)
2335 view->context = _pipe;
2336 return view;
2337 }
2338
2339 static void
tc_stream_output_target_destroy(struct pipe_context * _pipe,struct pipe_stream_output_target * target)2340 tc_stream_output_target_destroy(struct pipe_context *_pipe,
2341 struct pipe_stream_output_target *target)
2342 {
2343 struct pipe_context *pipe = threaded_context(_pipe)->pipe;
2344
2345 pipe->stream_output_target_destroy(pipe, target);
2346 }
2347
2348
2349 /********************************************************************
2350 * bindless
2351 */
2352
2353 static uint64_t
tc_create_texture_handle(struct pipe_context * _pipe,struct pipe_sampler_view * view,const struct pipe_sampler_state * state)2354 tc_create_texture_handle(struct pipe_context *_pipe,
2355 struct pipe_sampler_view *view,
2356 const struct pipe_sampler_state *state)
2357 {
2358 struct threaded_context *tc = threaded_context(_pipe);
2359 struct pipe_context *pipe = tc->pipe;
2360
2361 tc_sync(tc);
2362 return pipe->create_texture_handle(pipe, view, state);
2363 }
2364
2365 struct tc_make_texture_handle_resident {
2366 struct tc_call_base base;
2367 bool resident;
2368 uint64_t handle;
2369 };
2370
2371 static uint16_t
tc_call_make_texture_handle_resident(struct pipe_context * pipe,void * call)2372 tc_call_make_texture_handle_resident(struct pipe_context *pipe, void *call)
2373 {
2374 struct tc_make_texture_handle_resident *p =
2375 to_call(call, tc_make_texture_handle_resident);
2376
2377 pipe->make_texture_handle_resident(pipe, p->handle, p->resident);
2378 return call_size(tc_make_texture_handle_resident);
2379 }
2380
2381 static void
tc_make_texture_handle_resident(struct pipe_context * _pipe,uint64_t handle,bool resident)2382 tc_make_texture_handle_resident(struct pipe_context *_pipe, uint64_t handle,
2383 bool resident)
2384 {
2385 struct threaded_context *tc = threaded_context(_pipe);
2386 struct tc_make_texture_handle_resident *p =
2387 tc_add_call(tc, TC_CALL_make_texture_handle_resident,
2388 tc_make_texture_handle_resident);
2389
2390 p->handle = handle;
2391 p->resident = resident;
2392 }
2393
2394 static uint64_t
tc_create_image_handle(struct pipe_context * _pipe,const struct pipe_image_view * image)2395 tc_create_image_handle(struct pipe_context *_pipe,
2396 const struct pipe_image_view *image)
2397 {
2398 struct threaded_context *tc = threaded_context(_pipe);
2399 struct pipe_context *pipe = tc->pipe;
2400
2401 if (image->resource->target == PIPE_BUFFER)
2402 tc_buffer_disable_cpu_storage(image->resource);
2403
2404 tc_sync(tc);
2405 return pipe->create_image_handle(pipe, image);
2406 }
2407
2408 struct tc_make_image_handle_resident {
2409 struct tc_call_base base;
2410 bool resident;
2411 unsigned access;
2412 uint64_t handle;
2413 };
2414
2415 static uint16_t
tc_call_make_image_handle_resident(struct pipe_context * pipe,void * call)2416 tc_call_make_image_handle_resident(struct pipe_context *pipe, void *call)
2417 {
2418 struct tc_make_image_handle_resident *p =
2419 to_call(call, tc_make_image_handle_resident);
2420
2421 pipe->make_image_handle_resident(pipe, p->handle, p->access, p->resident);
2422 return call_size(tc_make_image_handle_resident);
2423 }
2424
2425 static void
tc_make_image_handle_resident(struct pipe_context * _pipe,uint64_t handle,unsigned access,bool resident)2426 tc_make_image_handle_resident(struct pipe_context *_pipe, uint64_t handle,
2427 unsigned access, bool resident)
2428 {
2429 struct threaded_context *tc = threaded_context(_pipe);
2430 struct tc_make_image_handle_resident *p =
2431 tc_add_call(tc, TC_CALL_make_image_handle_resident,
2432 tc_make_image_handle_resident);
2433
2434 p->handle = handle;
2435 p->access = access;
2436 p->resident = resident;
2437 }
2438
2439
2440 /********************************************************************
2441 * transfer
2442 */
2443
2444 struct tc_replace_buffer_storage {
2445 struct tc_call_base base;
2446 uint16_t num_rebinds;
2447 uint32_t rebind_mask;
2448 uint32_t delete_buffer_id;
2449 struct pipe_resource *dst;
2450 struct pipe_resource *src;
2451 tc_replace_buffer_storage_func func;
2452 };
2453
2454 static uint16_t
tc_call_replace_buffer_storage(struct pipe_context * pipe,void * call)2455 tc_call_replace_buffer_storage(struct pipe_context *pipe, void *call)
2456 {
2457 struct tc_replace_buffer_storage *p = to_call(call, tc_replace_buffer_storage);
2458
2459 p->func(pipe, p->dst, p->src, p->num_rebinds, p->rebind_mask, p->delete_buffer_id);
2460
2461 tc_drop_resource_reference(p->dst);
2462 tc_drop_resource_reference(p->src);
2463 return call_size(tc_replace_buffer_storage);
2464 }
2465
2466 /* Return true if the buffer has been invalidated or is idle. */
2467 static bool
tc_invalidate_buffer(struct threaded_context * tc,struct threaded_resource * tbuf)2468 tc_invalidate_buffer(struct threaded_context *tc,
2469 struct threaded_resource *tbuf)
2470 {
2471 if (!tc_is_buffer_busy(tc, tbuf, PIPE_MAP_READ_WRITE)) {
2472 /* It's idle, so invalidation would be a no-op, but we can still clear
2473 * the valid range because we are technically doing invalidation, but
2474 * skipping it because it's useless.
2475 *
2476 * If the buffer is bound for write, we can't invalidate the range.
2477 */
2478 if (!tc_is_buffer_bound_for_write(tc, tbuf->buffer_id_unique))
2479 util_range_set_empty(&tbuf->valid_buffer_range);
2480 return true;
2481 }
2482
2483 struct pipe_screen *screen = tc->base.screen;
2484 struct pipe_resource *new_buf;
2485
2486 /* Shared, pinned, and sparse buffers can't be reallocated. */
2487 if (tbuf->is_shared ||
2488 tbuf->is_user_ptr ||
2489 tbuf->b.flags & (PIPE_RESOURCE_FLAG_SPARSE | PIPE_RESOURCE_FLAG_UNMAPPABLE))
2490 return false;
2491
2492 /* Allocate a new one. */
2493 new_buf = screen->resource_create(screen, &tbuf->b);
2494 if (!new_buf)
2495 return false;
2496
2497 /* Replace the "latest" pointer. */
2498 if (tbuf->latest != &tbuf->b)
2499 pipe_resource_reference(&tbuf->latest, NULL);
2500
2501 tbuf->latest = new_buf;
2502
2503 uint32_t delete_buffer_id = tbuf->buffer_id_unique;
2504
2505 /* Enqueue storage replacement of the original buffer. */
2506 struct tc_replace_buffer_storage *p =
2507 tc_add_call(tc, TC_CALL_replace_buffer_storage,
2508 tc_replace_buffer_storage);
2509
2510 p->func = tc->replace_buffer_storage;
2511 tc_set_resource_reference(&p->dst, &tbuf->b);
2512 tc_set_resource_reference(&p->src, new_buf);
2513 p->delete_buffer_id = delete_buffer_id;
2514 p->rebind_mask = 0;
2515
2516 /* Treat the current buffer as the new buffer. */
2517 bool bound_for_write = tc_is_buffer_bound_for_write(tc, tbuf->buffer_id_unique);
2518 p->num_rebinds = tc_rebind_buffer(tc, tbuf->buffer_id_unique,
2519 threaded_resource(new_buf)->buffer_id_unique,
2520 &p->rebind_mask);
2521
2522 /* If the buffer is not bound for write, clear the valid range. */
2523 if (!bound_for_write)
2524 util_range_set_empty(&tbuf->valid_buffer_range);
2525
2526 tbuf->buffer_id_unique = threaded_resource(new_buf)->buffer_id_unique;
2527 threaded_resource(new_buf)->buffer_id_unique = 0;
2528
2529 return true;
2530 }
2531
2532 static unsigned
tc_improve_map_buffer_flags(struct threaded_context * tc,struct threaded_resource * tres,unsigned usage,unsigned offset,unsigned size)2533 tc_improve_map_buffer_flags(struct threaded_context *tc,
2534 struct threaded_resource *tres, unsigned usage,
2535 unsigned offset, unsigned size)
2536 {
2537 /* Never invalidate inside the driver and never infer "unsynchronized". */
2538 unsigned tc_flags = TC_TRANSFER_MAP_NO_INVALIDATE |
2539 TC_TRANSFER_MAP_NO_INFER_UNSYNCHRONIZED;
2540
2541 /* Prevent a reentry. */
2542 if (usage & tc_flags)
2543 return usage;
2544
2545 /* Use the staging upload if it's preferred. */
2546 if (usage & (PIPE_MAP_DISCARD_RANGE |
2547 PIPE_MAP_DISCARD_WHOLE_RESOURCE) &&
2548 !(usage & PIPE_MAP_PERSISTENT) &&
2549 tres->b.flags & PIPE_RESOURCE_FLAG_DONT_MAP_DIRECTLY &&
2550 tc->use_forced_staging_uploads) {
2551 usage &= ~(PIPE_MAP_DISCARD_WHOLE_RESOURCE |
2552 PIPE_MAP_UNSYNCHRONIZED);
2553
2554 return usage | tc_flags | PIPE_MAP_DISCARD_RANGE;
2555 }
2556
2557 /* Sparse buffers can't be mapped directly and can't be reallocated
2558 * (fully invalidated). That may just be a radeonsi limitation, but
2559 * the threaded context must obey it with radeonsi.
2560 */
2561 if (tres->b.flags & (PIPE_RESOURCE_FLAG_SPARSE | PIPE_RESOURCE_FLAG_UNMAPPABLE)) {
2562 /* We can use DISCARD_RANGE instead of full discard. This is the only
2563 * fast path for sparse buffers that doesn't need thread synchronization.
2564 */
2565 if (usage & PIPE_MAP_DISCARD_WHOLE_RESOURCE)
2566 usage |= PIPE_MAP_DISCARD_RANGE;
2567
2568 /* Allow DISCARD_WHOLE_RESOURCE and infering UNSYNCHRONIZED in drivers.
2569 * The threaded context doesn't do unsychronized mappings and invalida-
2570 * tions of sparse buffers, therefore a correct driver behavior won't
2571 * result in an incorrect behavior with the threaded context.
2572 */
2573 return usage;
2574 }
2575
2576 usage |= tc_flags;
2577
2578 /* Handle CPU reads trivially. */
2579 if (usage & PIPE_MAP_READ) {
2580 if (usage & PIPE_MAP_UNSYNCHRONIZED)
2581 usage |= TC_TRANSFER_MAP_THREADED_UNSYNC; /* don't sync */
2582
2583 /* Drivers aren't allowed to do buffer invalidations. */
2584 return usage & ~PIPE_MAP_DISCARD_WHOLE_RESOURCE;
2585 }
2586
2587 /* See if the buffer range being mapped has never been initialized or
2588 * the buffer is idle, in which case it can be mapped unsynchronized. */
2589 if (!(usage & PIPE_MAP_UNSYNCHRONIZED) &&
2590 ((!tres->is_shared &&
2591 !util_ranges_intersect(&tres->valid_buffer_range, offset, offset + size)) ||
2592 !tc_is_buffer_busy(tc, tres, usage)))
2593 usage |= PIPE_MAP_UNSYNCHRONIZED;
2594
2595 if (!(usage & PIPE_MAP_UNSYNCHRONIZED)) {
2596 /* If discarding the entire range, discard the whole resource instead. */
2597 if (usage & PIPE_MAP_DISCARD_RANGE &&
2598 offset == 0 && size == tres->b.width0)
2599 usage |= PIPE_MAP_DISCARD_WHOLE_RESOURCE;
2600
2601 /* Discard the whole resource if needed. */
2602 if (usage & PIPE_MAP_DISCARD_WHOLE_RESOURCE) {
2603 if (tc_invalidate_buffer(tc, tres))
2604 usage |= PIPE_MAP_UNSYNCHRONIZED;
2605 else
2606 usage |= PIPE_MAP_DISCARD_RANGE; /* fallback */
2607 }
2608 }
2609
2610 /* We won't need this flag anymore. */
2611 /* TODO: We might not need TC_TRANSFER_MAP_NO_INVALIDATE with this. */
2612 usage &= ~PIPE_MAP_DISCARD_WHOLE_RESOURCE;
2613
2614 /* GL_AMD_pinned_memory and persistent mappings can't use staging
2615 * buffers. */
2616 if (usage & (PIPE_MAP_UNSYNCHRONIZED |
2617 PIPE_MAP_PERSISTENT) ||
2618 tres->is_user_ptr)
2619 usage &= ~PIPE_MAP_DISCARD_RANGE;
2620
2621 /* Unsychronized buffer mappings don't have to synchronize the thread. */
2622 if (usage & PIPE_MAP_UNSYNCHRONIZED) {
2623 usage &= ~PIPE_MAP_DISCARD_RANGE;
2624 usage |= TC_TRANSFER_MAP_THREADED_UNSYNC; /* notify the driver */
2625 }
2626
2627 return usage;
2628 }
2629
2630 static void *
tc_buffer_map(struct pipe_context * _pipe,struct pipe_resource * resource,unsigned level,unsigned usage,const struct pipe_box * box,struct pipe_transfer ** transfer)2631 tc_buffer_map(struct pipe_context *_pipe,
2632 struct pipe_resource *resource, unsigned level,
2633 unsigned usage, const struct pipe_box *box,
2634 struct pipe_transfer **transfer)
2635 {
2636 struct threaded_context *tc = threaded_context(_pipe);
2637 struct threaded_resource *tres = threaded_resource(resource);
2638 struct pipe_context *pipe = tc->pipe;
2639
2640 /* PIPE_MAP_THREAD_SAFE is for glthread, which shouldn't use the CPU storage and
2641 * this shouldn't normally be necessary because glthread only uses large buffers.
2642 */
2643 if (usage & PIPE_MAP_THREAD_SAFE)
2644 tc_buffer_disable_cpu_storage(resource);
2645
2646 usage = tc_improve_map_buffer_flags(tc, tres, usage, box->x, box->width);
2647
2648 /* If the CPU storage is enabled, return it directly. */
2649 if (tres->allow_cpu_storage && !(usage & TC_TRANSFER_MAP_UPLOAD_CPU_STORAGE)) {
2650 /* We can't let resource_copy_region disable the CPU storage. */
2651 assert(!(tres->b.flags & PIPE_RESOURCE_FLAG_DONT_MAP_DIRECTLY));
2652
2653 if (!tres->cpu_storage) {
2654 tres->cpu_storage = align_malloc(resource->width0, tc->map_buffer_alignment);
2655
2656 if (tres->cpu_storage && tres->valid_buffer_range.end) {
2657 /* The GPU buffer contains valid data. Copy them to the CPU storage. */
2658 struct pipe_box box2;
2659 struct pipe_transfer *transfer2;
2660
2661 unsigned valid_range_len = tres->valid_buffer_range.end - tres->valid_buffer_range.start;
2662 u_box_1d(tres->valid_buffer_range.start, valid_range_len, &box2);
2663
2664 tc_sync_msg(tc, "cpu storage GPU -> CPU copy");
2665 tc_set_driver_thread(tc);
2666
2667 void *ret = pipe->buffer_map(pipe, tres->latest ? tres->latest : resource,
2668 0, PIPE_MAP_READ, &box2, &transfer2);
2669 memcpy(&((uint8_t*)tres->cpu_storage)[tres->valid_buffer_range.start],
2670 ret,
2671 valid_range_len);
2672 pipe->buffer_unmap(pipe, transfer2);
2673
2674 tc_clear_driver_thread(tc);
2675 }
2676 }
2677
2678 if (tres->cpu_storage) {
2679 struct threaded_transfer *ttrans = slab_zalloc(&tc->pool_transfers);
2680 ttrans->b.resource = resource;
2681 ttrans->b.usage = usage;
2682 ttrans->b.box = *box;
2683 ttrans->valid_buffer_range = &tres->valid_buffer_range;
2684 ttrans->cpu_storage_mapped = true;
2685 *transfer = &ttrans->b;
2686
2687 return (uint8_t*)tres->cpu_storage + box->x;
2688 } else {
2689 tres->allow_cpu_storage = false;
2690 }
2691 }
2692
2693 /* Do a staging transfer within the threaded context. The driver should
2694 * only get resource_copy_region.
2695 */
2696 if (usage & PIPE_MAP_DISCARD_RANGE) {
2697 struct threaded_transfer *ttrans = slab_zalloc(&tc->pool_transfers);
2698 uint8_t *map;
2699
2700 u_upload_alloc(tc->base.stream_uploader, 0,
2701 box->width + (box->x % tc->map_buffer_alignment),
2702 tc->map_buffer_alignment, &ttrans->b.offset,
2703 &ttrans->staging, (void**)&map);
2704 if (!map) {
2705 slab_free(&tc->pool_transfers, ttrans);
2706 return NULL;
2707 }
2708
2709 ttrans->b.resource = resource;
2710 ttrans->b.level = 0;
2711 ttrans->b.usage = usage;
2712 ttrans->b.box = *box;
2713 ttrans->b.stride = 0;
2714 ttrans->b.layer_stride = 0;
2715 ttrans->valid_buffer_range = &tres->valid_buffer_range;
2716 ttrans->cpu_storage_mapped = false;
2717 *transfer = &ttrans->b;
2718
2719 p_atomic_inc(&tres->pending_staging_uploads);
2720 util_range_add(resource, &tres->pending_staging_uploads_range,
2721 box->x, box->x + box->width);
2722
2723 return map + (box->x % tc->map_buffer_alignment);
2724 }
2725
2726 if (usage & PIPE_MAP_UNSYNCHRONIZED &&
2727 p_atomic_read(&tres->pending_staging_uploads) &&
2728 util_ranges_intersect(&tres->pending_staging_uploads_range, box->x, box->x + box->width)) {
2729 /* Write conflict detected between a staging transfer and the direct mapping we're
2730 * going to do. Resolve the conflict by ignoring UNSYNCHRONIZED so the direct mapping
2731 * will have to wait for the staging transfer completion.
2732 * Note: The conflict detection is only based on the mapped range, not on the actual
2733 * written range(s).
2734 */
2735 usage &= ~PIPE_MAP_UNSYNCHRONIZED & ~TC_TRANSFER_MAP_THREADED_UNSYNC;
2736 tc->use_forced_staging_uploads = false;
2737 }
2738
2739 /* Unsychronized buffer mappings don't have to synchronize the thread. */
2740 if (!(usage & TC_TRANSFER_MAP_THREADED_UNSYNC)) {
2741 tc_sync_msg(tc, usage & PIPE_MAP_DISCARD_RANGE ? " discard_range" :
2742 usage & PIPE_MAP_READ ? " read" : " staging conflict");
2743 tc_set_driver_thread(tc);
2744 }
2745
2746 tc->bytes_mapped_estimate += box->width;
2747
2748 void *ret = pipe->buffer_map(pipe, tres->latest ? tres->latest : resource,
2749 level, usage, box, transfer);
2750 threaded_transfer(*transfer)->valid_buffer_range = &tres->valid_buffer_range;
2751 threaded_transfer(*transfer)->cpu_storage_mapped = false;
2752
2753 if (!(usage & TC_TRANSFER_MAP_THREADED_UNSYNC))
2754 tc_clear_driver_thread(tc);
2755
2756 return ret;
2757 }
2758
2759 static void *
tc_texture_map(struct pipe_context * _pipe,struct pipe_resource * resource,unsigned level,unsigned usage,const struct pipe_box * box,struct pipe_transfer ** transfer)2760 tc_texture_map(struct pipe_context *_pipe,
2761 struct pipe_resource *resource, unsigned level,
2762 unsigned usage, const struct pipe_box *box,
2763 struct pipe_transfer **transfer)
2764 {
2765 struct threaded_context *tc = threaded_context(_pipe);
2766 struct threaded_resource *tres = threaded_resource(resource);
2767 struct pipe_context *pipe = tc->pipe;
2768
2769 tc_sync_msg(tc, "texture");
2770 tc_set_driver_thread(tc);
2771 /* block all unsync texture subdata during map */
2772 tc_set_resource_batch_usage_persistent(tc, resource, true);
2773
2774 tc->bytes_mapped_estimate += box->width;
2775
2776 void *ret = pipe->texture_map(pipe, tres->latest ? tres->latest : resource,
2777 level, usage, box, transfer);
2778
2779 if (!(usage & TC_TRANSFER_MAP_THREADED_UNSYNC))
2780 tc_clear_driver_thread(tc);
2781
2782 return ret;
2783 }
2784
2785 struct tc_transfer_flush_region {
2786 struct tc_call_base base;
2787 struct pipe_box box;
2788 struct pipe_transfer *transfer;
2789 };
2790
2791 static uint16_t
tc_call_transfer_flush_region(struct pipe_context * pipe,void * call)2792 tc_call_transfer_flush_region(struct pipe_context *pipe, void *call)
2793 {
2794 struct tc_transfer_flush_region *p = to_call(call, tc_transfer_flush_region);
2795
2796 pipe->transfer_flush_region(pipe, p->transfer, &p->box);
2797 return call_size(tc_transfer_flush_region);
2798 }
2799
2800 struct tc_resource_copy_region {
2801 struct tc_call_base base;
2802 unsigned dst_level;
2803 unsigned dstx, dsty, dstz;
2804 unsigned src_level;
2805 struct pipe_box src_box;
2806 struct pipe_resource *dst;
2807 struct pipe_resource *src;
2808 };
2809
2810 static void
2811 tc_resource_copy_region(struct pipe_context *_pipe,
2812 struct pipe_resource *dst, unsigned dst_level,
2813 unsigned dstx, unsigned dsty, unsigned dstz,
2814 struct pipe_resource *src, unsigned src_level,
2815 const struct pipe_box *src_box);
2816
2817 static void
tc_buffer_do_flush_region(struct threaded_context * tc,struct threaded_transfer * ttrans,const struct pipe_box * box)2818 tc_buffer_do_flush_region(struct threaded_context *tc,
2819 struct threaded_transfer *ttrans,
2820 const struct pipe_box *box)
2821 {
2822 struct threaded_resource *tres = threaded_resource(ttrans->b.resource);
2823
2824 if (ttrans->staging) {
2825 struct pipe_box src_box;
2826
2827 u_box_1d(ttrans->b.offset + ttrans->b.box.x % tc->map_buffer_alignment +
2828 (box->x - ttrans->b.box.x),
2829 box->width, &src_box);
2830
2831 /* Copy the staging buffer into the original one. */
2832 tc_resource_copy_region(&tc->base, ttrans->b.resource, 0, box->x, 0, 0,
2833 ttrans->staging, 0, &src_box);
2834 }
2835
2836 /* Don't update the valid range when we're uploading the CPU storage
2837 * because it includes the uninitialized range too.
2838 */
2839 if (!(ttrans->b.usage & TC_TRANSFER_MAP_UPLOAD_CPU_STORAGE)) {
2840 util_range_add(&tres->b, ttrans->valid_buffer_range,
2841 box->x, box->x + box->width);
2842 }
2843 }
2844
2845 static void
tc_transfer_flush_region(struct pipe_context * _pipe,struct pipe_transfer * transfer,const struct pipe_box * rel_box)2846 tc_transfer_flush_region(struct pipe_context *_pipe,
2847 struct pipe_transfer *transfer,
2848 const struct pipe_box *rel_box)
2849 {
2850 struct threaded_context *tc = threaded_context(_pipe);
2851 struct threaded_transfer *ttrans = threaded_transfer(transfer);
2852 struct threaded_resource *tres = threaded_resource(transfer->resource);
2853 unsigned required_usage = PIPE_MAP_WRITE |
2854 PIPE_MAP_FLUSH_EXPLICIT;
2855
2856 if (tres->b.target == PIPE_BUFFER) {
2857 if ((transfer->usage & required_usage) == required_usage) {
2858 struct pipe_box box;
2859
2860 u_box_1d(transfer->box.x + rel_box->x, rel_box->width, &box);
2861 tc_buffer_do_flush_region(tc, ttrans, &box);
2862 }
2863
2864 /* Staging transfers don't send the call to the driver.
2865 *
2866 * Transfers using the CPU storage shouldn't call transfer_flush_region
2867 * in the driver because the buffer is not really mapped on the driver
2868 * side and the CPU storage always re-uploads everything (flush_region
2869 * makes no difference).
2870 */
2871 if (ttrans->staging || ttrans->cpu_storage_mapped)
2872 return;
2873 }
2874
2875 struct tc_transfer_flush_region *p =
2876 tc_add_call(tc, TC_CALL_transfer_flush_region, tc_transfer_flush_region);
2877 p->transfer = transfer;
2878 p->box = *rel_box;
2879 }
2880
2881 static void
2882 tc_flush(struct pipe_context *_pipe, struct pipe_fence_handle **fence,
2883 unsigned flags);
2884
2885 struct tc_buffer_unmap {
2886 struct tc_call_base base;
2887 bool was_staging_transfer;
2888 union {
2889 struct pipe_transfer *transfer;
2890 struct pipe_resource *resource;
2891 };
2892 };
2893
2894 static uint16_t
tc_call_buffer_unmap(struct pipe_context * pipe,void * call)2895 tc_call_buffer_unmap(struct pipe_context *pipe, void *call)
2896 {
2897 struct tc_buffer_unmap *p = to_call(call, tc_buffer_unmap);
2898
2899 if (p->was_staging_transfer) {
2900 struct threaded_resource *tres = threaded_resource(p->resource);
2901 /* Nothing to do except keeping track of staging uploads */
2902 assert(tres->pending_staging_uploads > 0);
2903 p_atomic_dec(&tres->pending_staging_uploads);
2904 tc_drop_resource_reference(p->resource);
2905 } else {
2906 pipe->buffer_unmap(pipe, p->transfer);
2907 }
2908
2909 return call_size(tc_buffer_unmap);
2910 }
2911
2912 static void
tc_buffer_unmap(struct pipe_context * _pipe,struct pipe_transfer * transfer)2913 tc_buffer_unmap(struct pipe_context *_pipe, struct pipe_transfer *transfer)
2914 {
2915 struct threaded_context *tc = threaded_context(_pipe);
2916 struct threaded_transfer *ttrans = threaded_transfer(transfer);
2917 struct threaded_resource *tres = threaded_resource(transfer->resource);
2918
2919 /* PIPE_MAP_THREAD_SAFE is only valid with UNSYNCHRONIZED. It can be
2920 * called from any thread and bypasses all multithreaded queues.
2921 */
2922 if (transfer->usage & PIPE_MAP_THREAD_SAFE) {
2923 assert(transfer->usage & PIPE_MAP_UNSYNCHRONIZED);
2924 assert(!(transfer->usage & (PIPE_MAP_FLUSH_EXPLICIT |
2925 PIPE_MAP_DISCARD_RANGE)));
2926
2927 struct pipe_context *pipe = tc->pipe;
2928 util_range_add(&tres->b, ttrans->valid_buffer_range,
2929 transfer->box.x, transfer->box.x + transfer->box.width);
2930
2931 pipe->buffer_unmap(pipe, transfer);
2932 return;
2933 }
2934
2935 if (transfer->usage & PIPE_MAP_WRITE &&
2936 !(transfer->usage & PIPE_MAP_FLUSH_EXPLICIT))
2937 tc_buffer_do_flush_region(tc, ttrans, &transfer->box);
2938
2939 if (ttrans->cpu_storage_mapped) {
2940 /* GL allows simultaneous GPU stores with mapped buffers as long as GPU stores don't
2941 * touch the mapped range. That's a problem because GPU stores free the CPU storage.
2942 * If that happens, we just ignore the unmap call and don't upload anything to prevent
2943 * a crash.
2944 *
2945 * Disallow the CPU storage in the driver to work around this.
2946 */
2947 assert(tres->cpu_storage);
2948
2949 if (tres->cpu_storage) {
2950 tc_invalidate_buffer(tc, tres);
2951 tc_buffer_subdata(&tc->base, &tres->b,
2952 PIPE_MAP_UNSYNCHRONIZED |
2953 TC_TRANSFER_MAP_UPLOAD_CPU_STORAGE,
2954 0, tres->b.width0, tres->cpu_storage);
2955 /* This shouldn't have been freed by buffer_subdata. */
2956 assert(tres->cpu_storage);
2957 } else {
2958 static bool warned_once = false;
2959 if (!warned_once) {
2960 fprintf(stderr, "This application is incompatible with cpu_storage.\n");
2961 fprintf(stderr, "Use tc_max_cpu_storage_size=0 to disable it and report this issue to Mesa.\n");
2962 warned_once = true;
2963 }
2964 }
2965
2966 tc_drop_resource_reference(ttrans->staging);
2967 slab_free(&tc->pool_transfers, ttrans);
2968 return;
2969 }
2970
2971 bool was_staging_transfer = false;
2972
2973 if (ttrans->staging) {
2974 was_staging_transfer = true;
2975
2976 tc_drop_resource_reference(ttrans->staging);
2977 slab_free(&tc->pool_transfers, ttrans);
2978 }
2979
2980 struct tc_buffer_unmap *p = tc_add_call(tc, TC_CALL_buffer_unmap,
2981 tc_buffer_unmap);
2982 if (was_staging_transfer) {
2983 tc_set_resource_reference(&p->resource, &tres->b);
2984 p->was_staging_transfer = true;
2985 } else {
2986 p->transfer = transfer;
2987 p->was_staging_transfer = false;
2988 }
2989
2990 /* tc_buffer_map directly maps the buffers, but tc_buffer_unmap
2991 * defers the unmap operation to the batch execution.
2992 * bytes_mapped_estimate is an estimation of the map/unmap bytes delta
2993 * and if it goes over an optional limit the current batch is flushed,
2994 * to reclaim some RAM. */
2995 if (!ttrans->staging && tc->bytes_mapped_limit &&
2996 tc->bytes_mapped_estimate > tc->bytes_mapped_limit) {
2997 tc_flush(_pipe, NULL, PIPE_FLUSH_ASYNC);
2998 }
2999 }
3000
3001 struct tc_texture_unmap {
3002 struct tc_call_base base;
3003 struct pipe_transfer *transfer;
3004 };
3005
3006 static uint16_t
tc_call_texture_unmap(struct pipe_context * pipe,void * call)3007 tc_call_texture_unmap(struct pipe_context *pipe, void *call)
3008 {
3009 struct tc_texture_unmap *p = (struct tc_texture_unmap *) call;
3010
3011 pipe->texture_unmap(pipe, p->transfer);
3012 return call_size(tc_texture_unmap);
3013 }
3014
3015 static void
tc_texture_unmap(struct pipe_context * _pipe,struct pipe_transfer * transfer)3016 tc_texture_unmap(struct pipe_context *_pipe, struct pipe_transfer *transfer)
3017 {
3018 struct threaded_context *tc = threaded_context(_pipe);
3019 struct threaded_transfer *ttrans = threaded_transfer(transfer);
3020
3021 /* enable subdata again once resource is no longer mapped */
3022 tc_set_resource_batch_usage_persistent(tc, transfer->resource, false);
3023
3024 tc_add_call(tc, TC_CALL_texture_unmap, tc_texture_unmap)->transfer = transfer;
3025
3026 /* tc_texture_map directly maps the textures, but tc_texture_unmap
3027 * defers the unmap operation to the batch execution.
3028 * bytes_mapped_estimate is an estimation of the map/unmap bytes delta
3029 * and if it goes over an optional limit the current batch is flushed,
3030 * to reclaim some RAM. */
3031 if (!ttrans->staging && tc->bytes_mapped_limit &&
3032 tc->bytes_mapped_estimate > tc->bytes_mapped_limit) {
3033 tc_flush(_pipe, NULL, PIPE_FLUSH_ASYNC);
3034 }
3035 }
3036
3037 struct tc_buffer_subdata {
3038 struct tc_call_base base;
3039 unsigned usage, offset, size;
3040 struct pipe_resource *resource;
3041 char slot[0]; /* more will be allocated if needed */
3042 };
3043
3044 static uint16_t
tc_call_buffer_subdata(struct pipe_context * pipe,void * call)3045 tc_call_buffer_subdata(struct pipe_context *pipe, void *call)
3046 {
3047 struct tc_buffer_subdata *p = (struct tc_buffer_subdata *)call;
3048
3049 pipe->buffer_subdata(pipe, p->resource, p->usage, p->offset, p->size,
3050 p->slot);
3051 tc_drop_resource_reference(p->resource);
3052 return p->base.num_slots;
3053 }
3054
3055 static bool
is_mergeable_buffer_subdata(const struct tc_call_base * previous_call,unsigned usage,unsigned offset,struct pipe_resource * resource)3056 is_mergeable_buffer_subdata(const struct tc_call_base *previous_call,
3057 unsigned usage, unsigned offset,
3058 struct pipe_resource *resource)
3059 {
3060 if (!previous_call || previous_call->call_id != TC_CALL_buffer_subdata)
3061 return false;
3062
3063 struct tc_buffer_subdata *subdata = (struct tc_buffer_subdata *)previous_call;
3064
3065 return subdata->usage == usage && subdata->resource == resource
3066 && (subdata->offset + subdata->size) == offset;
3067 }
3068
3069 static void
tc_buffer_subdata(struct pipe_context * _pipe,struct pipe_resource * resource,unsigned usage,unsigned offset,unsigned size,const void * data)3070 tc_buffer_subdata(struct pipe_context *_pipe,
3071 struct pipe_resource *resource,
3072 unsigned usage, unsigned offset,
3073 unsigned size, const void *data)
3074 {
3075 struct threaded_context *tc = threaded_context(_pipe);
3076 struct threaded_resource *tres = threaded_resource(resource);
3077
3078 if (!size)
3079 return;
3080
3081 usage |= PIPE_MAP_WRITE;
3082
3083 /* PIPE_MAP_DIRECTLY supresses implicit DISCARD_RANGE. */
3084 if (!(usage & PIPE_MAP_DIRECTLY))
3085 usage |= PIPE_MAP_DISCARD_RANGE;
3086
3087 usage = tc_improve_map_buffer_flags(tc, tres, usage, offset, size);
3088
3089 /* Unsychronized and big transfers should use transfer_map. Also handle
3090 * full invalidations, because drivers aren't allowed to do them.
3091 */
3092 if (usage & (PIPE_MAP_UNSYNCHRONIZED |
3093 PIPE_MAP_DISCARD_WHOLE_RESOURCE) ||
3094 size > TC_MAX_SUBDATA_BYTES ||
3095 tres->cpu_storage) {
3096 struct pipe_transfer *transfer;
3097 struct pipe_box box;
3098 uint8_t *map = NULL;
3099
3100 u_box_1d(offset, size, &box);
3101
3102 /* CPU storage is only useful for partial updates. It can add overhead
3103 * on glBufferData calls so avoid using it.
3104 */
3105 if (!tres->cpu_storage && offset == 0 && size == resource->width0)
3106 usage |= TC_TRANSFER_MAP_UPLOAD_CPU_STORAGE;
3107
3108 map = tc_buffer_map(_pipe, resource, 0, usage, &box, &transfer);
3109 if (map) {
3110 memcpy(map, data, size);
3111 tc_buffer_unmap(_pipe, transfer);
3112 }
3113 return;
3114 }
3115
3116 util_range_add(&tres->b, &tres->valid_buffer_range, offset, offset + size);
3117
3118 /* We can potentially merge this subdata call with the previous one (if any),
3119 * if the application does a whole-buffer upload piecewise. */
3120 {
3121 struct tc_call_base *last_call = tc_get_last_mergeable_call(tc);
3122 struct tc_buffer_subdata *merge_dest = (struct tc_buffer_subdata *)last_call;
3123
3124 if (is_mergeable_buffer_subdata(last_call, usage, offset, resource) &&
3125 tc_enlarge_last_mergeable_call(tc, call_size_with_slots(tc_buffer_subdata, merge_dest->size + size))) {
3126 memcpy(merge_dest->slot + merge_dest->size, data, size);
3127 merge_dest->size += size;
3128
3129 /* TODO: We *could* do an invalidate + upload here if we detect that
3130 * the merged subdata call overwrites the entire buffer. However, that's
3131 * a little complicated since we can't add further calls to our batch
3132 * until we have removed the merged subdata call, which means that
3133 * calling tc_invalidate_buffer before we have removed the call will
3134 * blow things up.
3135 *
3136 * Just leave a large, merged subdata call in the batch for now, which is
3137 * at least better than tons of tiny subdata calls.
3138 */
3139
3140 return;
3141 }
3142 }
3143
3144 /* The upload is small. Enqueue it. */
3145 struct tc_buffer_subdata *p =
3146 tc_add_slot_based_call(tc, TC_CALL_buffer_subdata, tc_buffer_subdata, size);
3147
3148 tc_set_resource_reference(&p->resource, resource);
3149 /* This is will always be busy because if it wasn't, tc_improve_map_buffer-
3150 * _flags would set UNSYNCHRONIZED and we wouldn't get here.
3151 */
3152 tc_add_to_buffer_list(&tc->buffer_lists[tc->next_buf_list], resource);
3153 p->usage = usage;
3154 p->offset = offset;
3155 p->size = size;
3156 memcpy(p->slot, data, size);
3157
3158 tc_mark_call_mergeable(tc, &p->base);
3159 }
3160
3161 struct tc_texture_subdata {
3162 struct tc_call_base base;
3163 unsigned level, usage, stride;
3164 struct pipe_box box;
3165 struct pipe_resource *resource;
3166 uintptr_t layer_stride;
3167 char slot[0]; /* more will be allocated if needed */
3168 };
3169
3170 static uint16_t
tc_call_texture_subdata(struct pipe_context * pipe,void * call)3171 tc_call_texture_subdata(struct pipe_context *pipe, void *call)
3172 {
3173 struct tc_texture_subdata *p = (struct tc_texture_subdata *)call;
3174
3175 pipe->texture_subdata(pipe, p->resource, p->level, p->usage, &p->box,
3176 p->slot, p->stride, p->layer_stride);
3177 tc_drop_resource_reference(p->resource);
3178 return p->base.num_slots;
3179 }
3180
3181 static void
tc_texture_subdata(struct pipe_context * _pipe,struct pipe_resource * resource,unsigned level,unsigned usage,const struct pipe_box * box,const void * data,unsigned stride,uintptr_t layer_stride)3182 tc_texture_subdata(struct pipe_context *_pipe,
3183 struct pipe_resource *resource,
3184 unsigned level, unsigned usage,
3185 const struct pipe_box *box,
3186 const void *data, unsigned stride,
3187 uintptr_t layer_stride)
3188 {
3189 struct threaded_context *tc = threaded_context(_pipe);
3190 uint64_t size;
3191
3192 assert(box->height >= 1);
3193 assert(box->depth >= 1);
3194
3195 size = (box->depth - 1) * layer_stride +
3196 (box->height - 1) * (uint64_t)stride +
3197 box->width * util_format_get_blocksize(resource->format);
3198 if (!size)
3199 return;
3200
3201 /* Small uploads can be enqueued, big uploads must sync. */
3202 if (size <= TC_MAX_SUBDATA_BYTES) {
3203 struct tc_texture_subdata *p =
3204 tc_add_slot_based_call(tc, TC_CALL_texture_subdata, tc_texture_subdata, size);
3205
3206 tc_set_resource_batch_usage(tc, resource);
3207 tc_set_resource_reference(&p->resource, resource);
3208 p->level = level;
3209 p->usage = usage;
3210 p->box = *box;
3211 p->stride = stride;
3212 p->layer_stride = layer_stride;
3213 memcpy(p->slot, data, size);
3214 } else {
3215 struct pipe_context *pipe = tc->pipe;
3216 struct threaded_resource *tres = threaded_resource(resource);
3217 unsigned unsync_usage = TC_TRANSFER_MAP_THREADED_UNSYNC | PIPE_MAP_UNSYNCHRONIZED | PIPE_MAP_WRITE;
3218 bool can_unsync = !tc_resource_batch_usage_test_busy(tc, resource) &&
3219 tc->options.is_resource_busy &&
3220 !tc->options.is_resource_busy(tc->pipe->screen, tres->latest, usage | unsync_usage);
3221
3222 if (!can_unsync && resource->usage != PIPE_USAGE_STAGING &&
3223 tc->options.parse_renderpass_info && tc->in_renderpass) {
3224 enum pipe_format format = resource->format;
3225 if (usage & PIPE_MAP_DEPTH_ONLY)
3226 format = util_format_get_depth_only(format);
3227 else if (usage & PIPE_MAP_STENCIL_ONLY)
3228 format = PIPE_FORMAT_S8_UINT;
3229
3230 unsigned fmt_stride = util_format_get_stride(format, box->width);
3231 uint64_t fmt_layer_stride = util_format_get_2d_size(format, stride, box->height);
3232 assert(fmt_layer_stride * box->depth <= UINT32_MAX);
3233
3234 struct pipe_resource *pres = pipe_buffer_create(pipe->screen, 0, PIPE_USAGE_STREAM, layer_stride * box->depth);
3235 pipe->buffer_subdata(pipe, pres, unsync_usage, 0, layer_stride * box->depth, data);
3236 struct pipe_box src_box = *box;
3237 src_box.x = src_box.y = src_box.z = 0;
3238
3239 if (fmt_stride == stride && fmt_layer_stride == layer_stride) {
3240 /* if stride matches, single copy is fine*/
3241 tc->base.resource_copy_region(&tc->base, resource, level, box->x, box->y, box->z, pres, 0, &src_box);
3242 } else {
3243 /* if stride doesn't match, inline util_copy_box on the GPU and assume the driver will optimize */
3244 src_box.depth = 1;
3245 for (unsigned z = 0; z < box->depth; ++z, src_box.x = z * layer_stride) {
3246 unsigned dst_x = box->x, dst_y = box->y, width = box->width, height = box->height, dst_z = box->z + z;
3247 int blocksize = util_format_get_blocksize(format);
3248 int blockwidth = util_format_get_blockwidth(format);
3249 int blockheight = util_format_get_blockheight(format);
3250
3251 assert(blocksize > 0);
3252 assert(blockwidth > 0);
3253 assert(blockheight > 0);
3254
3255 dst_x /= blockwidth;
3256 dst_y /= blockheight;
3257 width = DIV_ROUND_UP(width, blockwidth);
3258 height = DIV_ROUND_UP(height, blockheight);
3259
3260 width *= blocksize;
3261
3262 if (width == fmt_stride && width == (unsigned)stride) {
3263 ASSERTED uint64_t size = (uint64_t)height * width;
3264
3265 assert(size <= SIZE_MAX);
3266 assert(dst_x + src_box.width < u_minify(pres->width0, level));
3267 assert(dst_y + src_box.height < u_minify(pres->height0, level));
3268 assert(pres->target != PIPE_TEXTURE_3D || z + src_box.depth < u_minify(pres->depth0, level));
3269 tc->base.resource_copy_region(&tc->base, resource, level, dst_x, dst_y, dst_z, pres, 0, &src_box);
3270 } else {
3271 src_box.height = 1;
3272 for (unsigned i = 0; i < height; i++, dst_y++, src_box.x += stride)
3273 tc->base.resource_copy_region(&tc->base, resource, level, dst_x, dst_y, dst_z, pres, 0, &src_box);
3274 }
3275 }
3276 }
3277
3278 pipe_resource_reference(&pres, NULL);
3279 } else {
3280 if (can_unsync) {
3281 usage |= unsync_usage;
3282 } else {
3283 tc_sync(tc);
3284 tc_set_driver_thread(tc);
3285 }
3286 pipe->texture_subdata(pipe, resource, level, usage, box, data,
3287 stride, layer_stride);
3288 if (!can_unsync)
3289 tc_clear_driver_thread(tc);
3290 }
3291 }
3292 }
3293
3294
3295 /********************************************************************
3296 * miscellaneous
3297 */
3298
3299 #define TC_FUNC_SYNC_RET0(ret_type, func) \
3300 static ret_type \
3301 tc_##func(struct pipe_context *_pipe) \
3302 { \
3303 struct threaded_context *tc = threaded_context(_pipe); \
3304 struct pipe_context *pipe = tc->pipe; \
3305 tc_sync(tc); \
3306 return pipe->func(pipe); \
3307 }
3308
TC_FUNC_SYNC_RET0(uint64_t,get_timestamp)3309 TC_FUNC_SYNC_RET0(uint64_t, get_timestamp)
3310
3311 static void
3312 tc_get_sample_position(struct pipe_context *_pipe,
3313 unsigned sample_count, unsigned sample_index,
3314 float *out_value)
3315 {
3316 struct threaded_context *tc = threaded_context(_pipe);
3317 struct pipe_context *pipe = tc->pipe;
3318
3319 pipe->get_sample_position(pipe, sample_count, sample_index,
3320 out_value);
3321 }
3322
3323 static enum pipe_reset_status
tc_get_device_reset_status(struct pipe_context * _pipe)3324 tc_get_device_reset_status(struct pipe_context *_pipe)
3325 {
3326 struct threaded_context *tc = threaded_context(_pipe);
3327 struct pipe_context *pipe = tc->pipe;
3328
3329 if (!tc->options.unsynchronized_get_device_reset_status)
3330 tc_sync(tc);
3331
3332 return pipe->get_device_reset_status(pipe);
3333 }
3334
3335 static void
tc_set_device_reset_callback(struct pipe_context * _pipe,const struct pipe_device_reset_callback * cb)3336 tc_set_device_reset_callback(struct pipe_context *_pipe,
3337 const struct pipe_device_reset_callback *cb)
3338 {
3339 struct threaded_context *tc = threaded_context(_pipe);
3340 struct pipe_context *pipe = tc->pipe;
3341
3342 tc_sync(tc);
3343 pipe->set_device_reset_callback(pipe, cb);
3344 }
3345
3346 struct tc_string_marker {
3347 struct tc_call_base base;
3348 int len;
3349 char slot[0]; /* more will be allocated if needed */
3350 };
3351
3352 static uint16_t
tc_call_emit_string_marker(struct pipe_context * pipe,void * call)3353 tc_call_emit_string_marker(struct pipe_context *pipe, void *call)
3354 {
3355 struct tc_string_marker *p = (struct tc_string_marker *)call;
3356 pipe->emit_string_marker(pipe, p->slot, p->len);
3357 return p->base.num_slots;
3358 }
3359
3360 static void
tc_emit_string_marker(struct pipe_context * _pipe,const char * string,int len)3361 tc_emit_string_marker(struct pipe_context *_pipe,
3362 const char *string, int len)
3363 {
3364 struct threaded_context *tc = threaded_context(_pipe);
3365
3366 if (len <= TC_MAX_STRING_MARKER_BYTES) {
3367 struct tc_string_marker *p =
3368 tc_add_slot_based_call(tc, TC_CALL_emit_string_marker, tc_string_marker, len);
3369
3370 memcpy(p->slot, string, len);
3371 p->len = len;
3372 } else {
3373 struct pipe_context *pipe = tc->pipe;
3374
3375 tc_sync(tc);
3376 tc_set_driver_thread(tc);
3377 pipe->emit_string_marker(pipe, string, len);
3378 tc_clear_driver_thread(tc);
3379 }
3380 }
3381
3382 static void
tc_dump_debug_state(struct pipe_context * _pipe,FILE * stream,unsigned flags)3383 tc_dump_debug_state(struct pipe_context *_pipe, FILE *stream,
3384 unsigned flags)
3385 {
3386 struct threaded_context *tc = threaded_context(_pipe);
3387 struct pipe_context *pipe = tc->pipe;
3388
3389 tc_sync(tc);
3390 pipe->dump_debug_state(pipe, stream, flags);
3391 }
3392
3393 static void
tc_set_debug_callback(struct pipe_context * _pipe,const struct util_debug_callback * cb)3394 tc_set_debug_callback(struct pipe_context *_pipe,
3395 const struct util_debug_callback *cb)
3396 {
3397 struct threaded_context *tc = threaded_context(_pipe);
3398 struct pipe_context *pipe = tc->pipe;
3399
3400 tc_sync(tc);
3401
3402 /* Drop all synchronous debug callbacks. Drivers are expected to be OK
3403 * with this. shader-db will use an environment variable to disable
3404 * the threaded context.
3405 */
3406 if (cb && !cb->async)
3407 pipe->set_debug_callback(pipe, NULL);
3408 else
3409 pipe->set_debug_callback(pipe, cb);
3410 }
3411
3412 static void
tc_set_log_context(struct pipe_context * _pipe,struct u_log_context * log)3413 tc_set_log_context(struct pipe_context *_pipe, struct u_log_context *log)
3414 {
3415 struct threaded_context *tc = threaded_context(_pipe);
3416 struct pipe_context *pipe = tc->pipe;
3417
3418 tc_sync(tc);
3419 pipe->set_log_context(pipe, log);
3420 }
3421
3422 static void
tc_create_fence_fd(struct pipe_context * _pipe,struct pipe_fence_handle ** fence,int fd,enum pipe_fd_type type)3423 tc_create_fence_fd(struct pipe_context *_pipe,
3424 struct pipe_fence_handle **fence, int fd,
3425 enum pipe_fd_type type)
3426 {
3427 struct threaded_context *tc = threaded_context(_pipe);
3428 struct pipe_context *pipe = tc->pipe;
3429
3430 if (!tc->options.unsynchronized_create_fence_fd)
3431 tc_sync(tc);
3432
3433 pipe->create_fence_fd(pipe, fence, fd, type);
3434 }
3435
3436 struct tc_fence_call {
3437 struct tc_call_base base;
3438 struct pipe_fence_handle *fence;
3439 };
3440
3441 static uint16_t
tc_call_fence_server_sync(struct pipe_context * pipe,void * call)3442 tc_call_fence_server_sync(struct pipe_context *pipe, void *call)
3443 {
3444 struct pipe_fence_handle *fence = to_call(call, tc_fence_call)->fence;
3445
3446 pipe->fence_server_sync(pipe, fence);
3447 pipe->screen->fence_reference(pipe->screen, &fence, NULL);
3448 return call_size(tc_fence_call);
3449 }
3450
3451 static void
tc_fence_server_sync(struct pipe_context * _pipe,struct pipe_fence_handle * fence)3452 tc_fence_server_sync(struct pipe_context *_pipe,
3453 struct pipe_fence_handle *fence)
3454 {
3455 struct threaded_context *tc = threaded_context(_pipe);
3456 struct pipe_screen *screen = tc->pipe->screen;
3457 struct tc_fence_call *call = tc_add_call(tc, TC_CALL_fence_server_sync,
3458 tc_fence_call);
3459
3460 call->fence = NULL;
3461 screen->fence_reference(screen, &call->fence, fence);
3462 }
3463
3464 static void
tc_fence_server_signal(struct pipe_context * _pipe,struct pipe_fence_handle * fence)3465 tc_fence_server_signal(struct pipe_context *_pipe,
3466 struct pipe_fence_handle *fence)
3467 {
3468 struct threaded_context *tc = threaded_context(_pipe);
3469 struct pipe_context *pipe = tc->pipe;
3470 tc_sync(tc);
3471 pipe->fence_server_signal(pipe, fence);
3472 }
3473
3474 static struct pipe_video_codec *
tc_create_video_codec(UNUSED struct pipe_context * _pipe,UNUSED const struct pipe_video_codec * templ)3475 tc_create_video_codec(UNUSED struct pipe_context *_pipe,
3476 UNUSED const struct pipe_video_codec *templ)
3477 {
3478 unreachable("Threaded context should not be enabled for video APIs");
3479 return NULL;
3480 }
3481
3482 static struct pipe_video_buffer *
tc_create_video_buffer(UNUSED struct pipe_context * _pipe,UNUSED const struct pipe_video_buffer * templ)3483 tc_create_video_buffer(UNUSED struct pipe_context *_pipe,
3484 UNUSED const struct pipe_video_buffer *templ)
3485 {
3486 unreachable("Threaded context should not be enabled for video APIs");
3487 return NULL;
3488 }
3489
3490 struct tc_context_param {
3491 struct tc_call_base base;
3492 enum pipe_context_param param;
3493 unsigned value;
3494 };
3495
3496 static uint16_t
tc_call_set_context_param(struct pipe_context * pipe,void * call)3497 tc_call_set_context_param(struct pipe_context *pipe, void *call)
3498 {
3499 struct tc_context_param *p = to_call(call, tc_context_param);
3500
3501 if (pipe->set_context_param)
3502 pipe->set_context_param(pipe, p->param, p->value);
3503
3504 return call_size(tc_context_param);
3505 }
3506
3507 static void
tc_set_context_param(struct pipe_context * _pipe,enum pipe_context_param param,unsigned value)3508 tc_set_context_param(struct pipe_context *_pipe,
3509 enum pipe_context_param param,
3510 unsigned value)
3511 {
3512 struct threaded_context *tc = threaded_context(_pipe);
3513
3514 if (param == PIPE_CONTEXT_PARAM_UPDATE_THREAD_SCHEDULING) {
3515 util_thread_sched_apply_policy(tc->queue.threads[0],
3516 UTIL_THREAD_THREADED_CONTEXT, value,
3517 NULL);
3518
3519 /* Execute this immediately (without enqueuing).
3520 * It's required to be thread-safe.
3521 */
3522 struct pipe_context *pipe = tc->pipe;
3523 if (pipe->set_context_param)
3524 pipe->set_context_param(pipe, param, value);
3525 return;
3526 }
3527
3528 if (tc->pipe->set_context_param) {
3529 struct tc_context_param *call =
3530 tc_add_call(tc, TC_CALL_set_context_param, tc_context_param);
3531
3532 call->param = param;
3533 call->value = value;
3534 }
3535 }
3536
3537
3538 /********************************************************************
3539 * draw, launch, clear, blit, copy, flush
3540 */
3541
3542 struct tc_flush_deferred_call {
3543 struct tc_call_base base;
3544 unsigned flags;
3545 struct pipe_fence_handle *fence;
3546 };
3547
3548 struct tc_flush_call {
3549 struct tc_call_base base;
3550 unsigned flags;
3551 struct pipe_fence_handle *fence;
3552 struct threaded_context *tc;
3553 };
3554
3555 static void
tc_flush_queries(struct threaded_context * tc)3556 tc_flush_queries(struct threaded_context *tc)
3557 {
3558 struct threaded_query *tq, *tmp;
3559 LIST_FOR_EACH_ENTRY_SAFE(tq, tmp, &tc->unflushed_queries, head_unflushed) {
3560 list_del(&tq->head_unflushed);
3561
3562 /* Memory release semantics: due to a possible race with
3563 * tc_get_query_result, we must ensure that the linked list changes
3564 * are visible before setting tq->flushed.
3565 */
3566 p_atomic_set(&tq->flushed, true);
3567 }
3568 }
3569
3570 static uint16_t
tc_call_flush_deferred(struct pipe_context * pipe,void * call)3571 tc_call_flush_deferred(struct pipe_context *pipe, void *call)
3572 {
3573 struct tc_flush_deferred_call *p = to_call(call, tc_flush_deferred_call);
3574 struct pipe_screen *screen = pipe->screen;
3575
3576 pipe->flush(pipe, p->fence ? &p->fence : NULL, p->flags);
3577 screen->fence_reference(screen, &p->fence, NULL);
3578
3579 return call_size(tc_flush_deferred_call);
3580 }
3581
3582 static uint16_t
tc_call_flush(struct pipe_context * pipe,void * call)3583 tc_call_flush(struct pipe_context *pipe, void *call)
3584 {
3585 struct tc_flush_call *p = to_call(call, tc_flush_call);
3586 struct pipe_screen *screen = pipe->screen;
3587
3588 pipe->flush(pipe, p->fence ? &p->fence : NULL, p->flags);
3589 screen->fence_reference(screen, &p->fence, NULL);
3590
3591 tc_flush_queries(p->tc);
3592
3593 return call_size(tc_flush_call);
3594 }
3595
3596 static void
tc_flush(struct pipe_context * _pipe,struct pipe_fence_handle ** fence,unsigned flags)3597 tc_flush(struct pipe_context *_pipe, struct pipe_fence_handle **fence,
3598 unsigned flags)
3599 {
3600 struct threaded_context *tc = threaded_context(_pipe);
3601 struct pipe_context *pipe = tc->pipe;
3602 struct pipe_screen *screen = pipe->screen;
3603 bool async = flags & (PIPE_FLUSH_DEFERRED | PIPE_FLUSH_ASYNC);
3604 bool deferred = (flags & PIPE_FLUSH_DEFERRED) > 0;
3605
3606 if (!deferred || !fence)
3607 tc->in_renderpass = false;
3608
3609 if (async && tc->options.create_fence) {
3610 if (fence) {
3611 struct tc_batch *next = &tc->batch_slots[tc->next];
3612
3613 if (!next->token) {
3614 next->token = malloc(sizeof(*next->token));
3615 if (!next->token)
3616 goto out_of_memory;
3617
3618 pipe_reference_init(&next->token->ref, 1);
3619 next->token->tc = tc;
3620 }
3621
3622 screen->fence_reference(screen, fence,
3623 tc->options.create_fence(pipe, next->token));
3624 if (!*fence)
3625 goto out_of_memory;
3626 }
3627
3628 struct tc_flush_call *p;
3629 if (deferred) {
3630 /* these have identical fields */
3631 p = (struct tc_flush_call *)tc_add_call(tc, TC_CALL_flush_deferred, tc_flush_deferred_call);
3632 } else {
3633 p = tc_add_call(tc, TC_CALL_flush, tc_flush_call);
3634 p->tc = tc;
3635 }
3636 p->fence = fence ? *fence : NULL;
3637 p->flags = flags | TC_FLUSH_ASYNC;
3638
3639 if (!deferred) {
3640 /* non-deferred async flushes indicate completion of existing renderpass info */
3641 tc_signal_renderpass_info_ready(tc);
3642 tc_batch_flush(tc, false);
3643 tc->seen_fb_state = false;
3644 }
3645
3646 return;
3647 }
3648
3649 out_of_memory:
3650 tc->flushing = true;
3651 /* renderpass info is signaled during sync */
3652 tc_sync_msg(tc, flags & PIPE_FLUSH_END_OF_FRAME ? "end of frame" :
3653 flags & PIPE_FLUSH_DEFERRED ? "deferred fence" : "normal");
3654
3655 if (!deferred) {
3656 tc_flush_queries(tc);
3657 tc->seen_fb_state = false;
3658 tc->query_ended = false;
3659 }
3660 tc_set_driver_thread(tc);
3661 pipe->flush(pipe, fence, flags);
3662 tc_clear_driver_thread(tc);
3663 tc->flushing = false;
3664 }
3665
3666 struct tc_draw_single_drawid {
3667 struct tc_draw_single base;
3668 unsigned drawid_offset;
3669 };
3670
3671 static uint16_t
tc_call_draw_single_drawid(struct pipe_context * pipe,void * call)3672 tc_call_draw_single_drawid(struct pipe_context *pipe, void *call)
3673 {
3674 struct tc_draw_single_drawid *info_drawid = to_call(call, tc_draw_single_drawid);
3675 struct tc_draw_single *info = &info_drawid->base;
3676
3677 /* u_threaded_context stores start/count in min/max_index for single draws. */
3678 /* Drivers using u_threaded_context shouldn't use min/max_index. */
3679 struct pipe_draw_start_count_bias draw;
3680
3681 draw.start = info->info.min_index;
3682 draw.count = info->info.max_index;
3683 draw.index_bias = info->index_bias;
3684
3685 info->info.index_bounds_valid = false;
3686 info->info.has_user_indices = false;
3687 info->info.take_index_buffer_ownership = false;
3688
3689 pipe->draw_vbo(pipe, &info->info, info_drawid->drawid_offset, NULL, &draw, 1);
3690 if (info->info.index_size)
3691 tc_drop_resource_reference(info->info.index.resource);
3692
3693 return call_size(tc_draw_single_drawid);
3694 }
3695
3696 static void
simplify_draw_info(struct pipe_draw_info * info)3697 simplify_draw_info(struct pipe_draw_info *info)
3698 {
3699 /* Clear these fields to facilitate draw merging.
3700 * Drivers shouldn't use them.
3701 */
3702 info->has_user_indices = false;
3703 info->index_bounds_valid = false;
3704 info->take_index_buffer_ownership = false;
3705 info->index_bias_varies = false;
3706 info->_pad = 0;
3707
3708 /* This shouldn't be set when merging single draws. */
3709 info->increment_draw_id = false;
3710
3711 if (info->index_size) {
3712 if (!info->primitive_restart)
3713 info->restart_index = 0;
3714 } else {
3715 assert(!info->primitive_restart);
3716 info->primitive_restart = false;
3717 info->restart_index = 0;
3718 info->index.resource = NULL;
3719 }
3720 }
3721
3722 static bool
is_next_call_a_mergeable_draw(struct tc_draw_single * first,struct tc_draw_single * next)3723 is_next_call_a_mergeable_draw(struct tc_draw_single *first,
3724 struct tc_draw_single *next)
3725 {
3726 if (next->base.call_id != TC_CALL_draw_single)
3727 return false;
3728
3729 STATIC_ASSERT(offsetof(struct pipe_draw_info, min_index) ==
3730 sizeof(struct pipe_draw_info) - 8);
3731 STATIC_ASSERT(offsetof(struct pipe_draw_info, max_index) ==
3732 sizeof(struct pipe_draw_info) - 4);
3733 /* All fields must be the same except start and count. */
3734 /* u_threaded_context stores start/count in min/max_index for single draws. */
3735 return memcmp((uint32_t*)&first->info, (uint32_t*)&next->info,
3736 DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX) == 0;
3737 }
3738
3739 static uint16_t
tc_call_draw_single(struct pipe_context * pipe,void * call)3740 tc_call_draw_single(struct pipe_context *pipe, void *call)
3741 {
3742 /* Draw call merging. */
3743 struct tc_draw_single *first = to_call(call, tc_draw_single);
3744 struct tc_draw_single *next = get_next_call(first, tc_draw_single);
3745
3746 /* If at least 2 consecutive draw calls can be merged... */
3747 if (next->base.call_id == TC_CALL_draw_single) {
3748 if (is_next_call_a_mergeable_draw(first, next)) {
3749 /* The maximum number of merged draws is given by the batch size. */
3750 struct pipe_draw_start_count_bias multi[TC_SLOTS_PER_BATCH / call_size(tc_draw_single)];
3751 unsigned num_draws = 2;
3752 bool index_bias_varies = first->index_bias != next->index_bias;
3753
3754 /* u_threaded_context stores start/count in min/max_index for single draws. */
3755 multi[0].start = first->info.min_index;
3756 multi[0].count = first->info.max_index;
3757 multi[0].index_bias = first->index_bias;
3758 multi[1].start = next->info.min_index;
3759 multi[1].count = next->info.max_index;
3760 multi[1].index_bias = next->index_bias;
3761
3762 /* Find how many other draws can be merged. */
3763 next = get_next_call(next, tc_draw_single);
3764 for (; is_next_call_a_mergeable_draw(first, next);
3765 next = get_next_call(next, tc_draw_single), num_draws++) {
3766 /* u_threaded_context stores start/count in min/max_index for single draws. */
3767 multi[num_draws].start = next->info.min_index;
3768 multi[num_draws].count = next->info.max_index;
3769 multi[num_draws].index_bias = next->index_bias;
3770 index_bias_varies |= first->index_bias != next->index_bias;
3771 }
3772
3773 first->info.index_bias_varies = index_bias_varies;
3774 pipe->draw_vbo(pipe, &first->info, 0, NULL, multi, num_draws);
3775
3776 /* Since all draws use the same index buffer, drop all references at once. */
3777 if (first->info.index_size)
3778 pipe_drop_resource_references(first->info.index.resource, num_draws);
3779
3780 return call_size(tc_draw_single) * num_draws;
3781 }
3782 }
3783
3784 /* u_threaded_context stores start/count in min/max_index for single draws. */
3785 /* Drivers using u_threaded_context shouldn't use min/max_index. */
3786 struct pipe_draw_start_count_bias draw;
3787
3788 draw.start = first->info.min_index;
3789 draw.count = first->info.max_index;
3790 draw.index_bias = first->index_bias;
3791
3792 first->info.index_bounds_valid = false;
3793 first->info.has_user_indices = false;
3794 first->info.take_index_buffer_ownership = false;
3795
3796 pipe->draw_vbo(pipe, &first->info, 0, NULL, &draw, 1);
3797 if (first->info.index_size)
3798 tc_drop_resource_reference(first->info.index.resource);
3799
3800 return call_size(tc_draw_single);
3801 }
3802
3803 struct tc_draw_indirect {
3804 struct tc_call_base base;
3805 struct pipe_draw_start_count_bias draw;
3806 struct pipe_draw_info info;
3807 struct pipe_draw_indirect_info indirect;
3808 };
3809
3810 static uint16_t
tc_call_draw_indirect(struct pipe_context * pipe,void * call)3811 tc_call_draw_indirect(struct pipe_context *pipe, void *call)
3812 {
3813 struct tc_draw_indirect *info = to_call(call, tc_draw_indirect);
3814
3815 info->info.index_bounds_valid = false;
3816 info->info.take_index_buffer_ownership = false;
3817
3818 pipe->draw_vbo(pipe, &info->info, 0, &info->indirect, &info->draw, 1);
3819 if (info->info.index_size)
3820 tc_drop_resource_reference(info->info.index.resource);
3821
3822 tc_drop_resource_reference(info->indirect.buffer);
3823 tc_drop_resource_reference(info->indirect.indirect_draw_count);
3824 tc_drop_so_target_reference(info->indirect.count_from_stream_output);
3825 return call_size(tc_draw_indirect);
3826 }
3827
3828 struct tc_draw_multi {
3829 struct tc_call_base base;
3830 unsigned num_draws;
3831 struct pipe_draw_info info;
3832 struct pipe_draw_start_count_bias slot[]; /* variable-sized array */
3833 };
3834
3835 static uint16_t
tc_call_draw_multi(struct pipe_context * pipe,void * call)3836 tc_call_draw_multi(struct pipe_context *pipe, void *call)
3837 {
3838 struct tc_draw_multi *info = (struct tc_draw_multi*)call;
3839
3840 info->info.has_user_indices = false;
3841 info->info.index_bounds_valid = false;
3842 info->info.take_index_buffer_ownership = false;
3843
3844 pipe->draw_vbo(pipe, &info->info, 0, NULL, info->slot, info->num_draws);
3845 if (info->info.index_size)
3846 tc_drop_resource_reference(info->info.index.resource);
3847
3848 return info->base.num_slots;
3849 }
3850
3851 #define DRAW_INFO_SIZE_WITHOUT_INDEXBUF_AND_MIN_MAX_INDEX \
3852 offsetof(struct pipe_draw_info, index)
3853
3854 /* Single draw with drawid_offset == 0. */
3855 static void
tc_draw_single(struct pipe_context * _pipe,const struct pipe_draw_info * info,unsigned drawid_offset,const struct pipe_draw_indirect_info * indirect,const struct pipe_draw_start_count_bias * draws,unsigned num_draws)3856 tc_draw_single(struct pipe_context *_pipe, const struct pipe_draw_info *info,
3857 unsigned drawid_offset,
3858 const struct pipe_draw_indirect_info *indirect,
3859 const struct pipe_draw_start_count_bias *draws,
3860 unsigned num_draws)
3861 {
3862 struct threaded_context *tc = threaded_context(_pipe);
3863 struct tc_draw_single *p =
3864 tc_add_call(tc, TC_CALL_draw_single, tc_draw_single);
3865
3866 if (info->index_size) {
3867 if (!info->take_index_buffer_ownership) {
3868 tc_set_resource_reference(&p->info.index.resource,
3869 info->index.resource);
3870 }
3871 tc_add_to_buffer_list(&tc->buffer_lists[tc->next_buf_list], info->index.resource);
3872 }
3873 memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX);
3874 /* u_threaded_context stores start/count in min/max_index for single draws. */
3875 p->info.min_index = draws[0].start;
3876 p->info.max_index = draws[0].count;
3877 p->index_bias = draws[0].index_bias;
3878 simplify_draw_info(&p->info);
3879 }
3880
3881 /* Single draw with drawid_offset > 0. */
3882 static void
tc_draw_single_draw_id(struct pipe_context * _pipe,const struct pipe_draw_info * info,unsigned drawid_offset,const struct pipe_draw_indirect_info * indirect,const struct pipe_draw_start_count_bias * draws,unsigned num_draws)3883 tc_draw_single_draw_id(struct pipe_context *_pipe,
3884 const struct pipe_draw_info *info,
3885 unsigned drawid_offset,
3886 const struct pipe_draw_indirect_info *indirect,
3887 const struct pipe_draw_start_count_bias *draws,
3888 unsigned num_draws)
3889 {
3890 struct threaded_context *tc = threaded_context(_pipe);
3891 struct tc_draw_single *p =
3892 &tc_add_call(tc, TC_CALL_draw_single_drawid, tc_draw_single_drawid)->base;
3893
3894 if (info->index_size) {
3895 if (!info->take_index_buffer_ownership) {
3896 tc_set_resource_reference(&p->info.index.resource,
3897 info->index.resource);
3898 }
3899 tc_add_to_buffer_list(&tc->buffer_lists[tc->next_buf_list], info->index.resource);
3900 }
3901 ((struct tc_draw_single_drawid*)p)->drawid_offset = drawid_offset;
3902 memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX);
3903 /* u_threaded_context stores start/count in min/max_index for single draws. */
3904 p->info.min_index = draws[0].start;
3905 p->info.max_index = draws[0].count;
3906 p->index_bias = draws[0].index_bias;
3907 simplify_draw_info(&p->info);
3908 }
3909
3910 /* Single draw with user indices and drawid_offset == 0. */
3911 static void
tc_draw_user_indices_single(struct pipe_context * _pipe,const struct pipe_draw_info * info,unsigned drawid_offset,const struct pipe_draw_indirect_info * indirect,const struct pipe_draw_start_count_bias * draws,unsigned num_draws)3912 tc_draw_user_indices_single(struct pipe_context *_pipe,
3913 const struct pipe_draw_info *info,
3914 unsigned drawid_offset,
3915 const struct pipe_draw_indirect_info *indirect,
3916 const struct pipe_draw_start_count_bias *draws,
3917 unsigned num_draws)
3918 {
3919 struct threaded_context *tc = threaded_context(_pipe);
3920 unsigned index_size = info->index_size;
3921 unsigned size = draws[0].count * index_size;
3922 struct pipe_resource *buffer = NULL;
3923 unsigned offset;
3924
3925 if (!size)
3926 return;
3927
3928 /* This must be done before adding draw_vbo, because it could generate
3929 * e.g. transfer_unmap and flush partially-uninitialized draw_vbo
3930 * to the driver if it was done afterwards.
3931 */
3932 u_upload_data(tc->base.stream_uploader, 0, size, 4,
3933 (uint8_t*)info->index.user + draws[0].start * index_size,
3934 &offset, &buffer);
3935 if (unlikely(!buffer))
3936 return;
3937
3938 struct tc_draw_single *p =
3939 tc_add_call(tc, TC_CALL_draw_single, tc_draw_single);
3940 memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_INDEXBUF_AND_MIN_MAX_INDEX);
3941 p->info.index.resource = buffer;
3942 /* u_threaded_context stores start/count in min/max_index for single draws. */
3943 p->info.min_index = offset >> util_logbase2(index_size);
3944 p->info.max_index = draws[0].count;
3945 p->index_bias = draws[0].index_bias;
3946 simplify_draw_info(&p->info);
3947 }
3948
3949 /* Single draw with user indices and drawid_offset > 0. */
3950 static void
tc_draw_user_indices_single_draw_id(struct pipe_context * _pipe,const struct pipe_draw_info * info,unsigned drawid_offset,const struct pipe_draw_indirect_info * indirect,const struct pipe_draw_start_count_bias * draws,unsigned num_draws)3951 tc_draw_user_indices_single_draw_id(struct pipe_context *_pipe,
3952 const struct pipe_draw_info *info,
3953 unsigned drawid_offset,
3954 const struct pipe_draw_indirect_info *indirect,
3955 const struct pipe_draw_start_count_bias *draws,
3956 unsigned num_draws)
3957 {
3958 struct threaded_context *tc = threaded_context(_pipe);
3959 unsigned index_size = info->index_size;
3960 unsigned size = draws[0].count * index_size;
3961 struct pipe_resource *buffer = NULL;
3962 unsigned offset;
3963
3964 if (!size)
3965 return;
3966
3967 /* This must be done before adding draw_vbo, because it could generate
3968 * e.g. transfer_unmap and flush partially-uninitialized draw_vbo
3969 * to the driver if it was done afterwards.
3970 */
3971 u_upload_data(tc->base.stream_uploader, 0, size, 4,
3972 (uint8_t*)info->index.user + draws[0].start * index_size,
3973 &offset, &buffer);
3974 if (unlikely(!buffer))
3975 return;
3976
3977 struct tc_draw_single *p =
3978 &tc_add_call(tc, TC_CALL_draw_single_drawid, tc_draw_single_drawid)->base;
3979 memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_INDEXBUF_AND_MIN_MAX_INDEX);
3980 p->info.index.resource = buffer;
3981 ((struct tc_draw_single_drawid*)p)->drawid_offset = drawid_offset;
3982 /* u_threaded_context stores start/count in min/max_index for single draws. */
3983 p->info.min_index = offset >> util_logbase2(index_size);
3984 p->info.max_index = draws[0].count;
3985 p->index_bias = draws[0].index_bias;
3986 simplify_draw_info(&p->info);
3987 }
3988
3989 #define DRAW_OVERHEAD_BYTES sizeof(struct tc_draw_multi)
3990 #define ONE_DRAW_SLOT_BYTES sizeof(((struct tc_draw_multi*)NULL)->slot[0])
3991
3992 #define SLOTS_FOR_ONE_DRAW \
3993 DIV_ROUND_UP(DRAW_OVERHEAD_BYTES + ONE_DRAW_SLOT_BYTES, \
3994 sizeof(struct tc_call_base))
3995
3996 static void
tc_draw_multi(struct pipe_context * _pipe,const struct pipe_draw_info * info,unsigned drawid_offset,const struct pipe_draw_indirect_info * indirect,const struct pipe_draw_start_count_bias * draws,unsigned num_draws)3997 tc_draw_multi(struct pipe_context *_pipe, const struct pipe_draw_info *info,
3998 unsigned drawid_offset,
3999 const struct pipe_draw_indirect_info *indirect,
4000 const struct pipe_draw_start_count_bias *draws,
4001 unsigned num_draws)
4002 {
4003 struct threaded_context *tc = threaded_context(_pipe);
4004 int total_offset = 0;
4005 bool take_index_buffer_ownership = info->take_index_buffer_ownership;
4006
4007 while (num_draws) {
4008 struct tc_batch *next = &tc->batch_slots[tc->next];
4009
4010 int nb_slots_left = TC_SLOTS_PER_BATCH - 1 - next->num_total_slots;
4011 /* If there isn't enough place for one draw, try to fill the next one */
4012 if (nb_slots_left < SLOTS_FOR_ONE_DRAW)
4013 nb_slots_left = TC_SLOTS_PER_BATCH - 1;
4014 const int size_left_bytes = nb_slots_left * sizeof(struct tc_call_base);
4015
4016 /* How many draws can we fit in the current batch */
4017 const int dr = MIN2(num_draws, (size_left_bytes - DRAW_OVERHEAD_BYTES) /
4018 ONE_DRAW_SLOT_BYTES);
4019
4020 /* Non-indexed call or indexed with a real index buffer. */
4021 struct tc_draw_multi *p =
4022 tc_add_slot_based_call(tc, TC_CALL_draw_multi, tc_draw_multi,
4023 dr);
4024 if (info->index_size) {
4025 if (!take_index_buffer_ownership) {
4026 tc_set_resource_reference(&p->info.index.resource,
4027 info->index.resource);
4028 }
4029 tc_add_to_buffer_list(&tc->buffer_lists[tc->next_buf_list], info->index.resource);
4030 }
4031 take_index_buffer_ownership = false;
4032 memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX);
4033 p->num_draws = dr;
4034 memcpy(p->slot, &draws[total_offset], sizeof(draws[0]) * dr);
4035 num_draws -= dr;
4036
4037 total_offset += dr;
4038 }
4039 }
4040
4041 static void
tc_draw_user_indices_multi(struct pipe_context * _pipe,const struct pipe_draw_info * info,unsigned drawid_offset,const struct pipe_draw_indirect_info * indirect,const struct pipe_draw_start_count_bias * draws,unsigned num_draws)4042 tc_draw_user_indices_multi(struct pipe_context *_pipe,
4043 const struct pipe_draw_info *info,
4044 unsigned drawid_offset,
4045 const struct pipe_draw_indirect_info *indirect,
4046 const struct pipe_draw_start_count_bias *draws,
4047 unsigned num_draws)
4048 {
4049 struct threaded_context *tc = threaded_context(_pipe);
4050 struct pipe_resource *buffer = NULL;
4051 unsigned buffer_offset, total_count = 0;
4052 unsigned index_size_shift = util_logbase2(info->index_size);
4053 uint8_t *ptr = NULL;
4054
4055 /* Get the total count. */
4056 for (unsigned i = 0; i < num_draws; i++)
4057 total_count += draws[i].count;
4058
4059 if (!total_count)
4060 return;
4061
4062 /* Allocate space for all index buffers.
4063 *
4064 * This must be done before adding draw_vbo, because it could generate
4065 * e.g. transfer_unmap and flush partially-uninitialized draw_vbo
4066 * to the driver if it was done afterwards.
4067 */
4068 u_upload_alloc(tc->base.stream_uploader, 0,
4069 total_count << index_size_shift, 4,
4070 &buffer_offset, &buffer, (void**)&ptr);
4071 if (unlikely(!buffer))
4072 return;
4073
4074 int total_offset = 0;
4075 unsigned offset = 0;
4076 while (num_draws) {
4077 struct tc_batch *next = &tc->batch_slots[tc->next];
4078
4079 int nb_slots_left = TC_SLOTS_PER_BATCH - 1 - next->num_total_slots;
4080 /* If there isn't enough place for one draw, try to fill the next one */
4081 if (nb_slots_left < SLOTS_FOR_ONE_DRAW)
4082 nb_slots_left = TC_SLOTS_PER_BATCH - 1;
4083 const int size_left_bytes = nb_slots_left * sizeof(struct tc_call_base);
4084
4085 /* How many draws can we fit in the current batch */
4086 const int dr = MIN2(num_draws, (size_left_bytes - DRAW_OVERHEAD_BYTES) /
4087 ONE_DRAW_SLOT_BYTES);
4088
4089 struct tc_draw_multi *p =
4090 tc_add_slot_based_call(tc, TC_CALL_draw_multi, tc_draw_multi,
4091 dr);
4092 memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_INDEXBUF_AND_MIN_MAX_INDEX);
4093
4094 if (total_offset == 0)
4095 /* the first slot inherits the reference from u_upload_alloc() */
4096 p->info.index.resource = buffer;
4097 else
4098 /* all following slots need a new reference */
4099 tc_set_resource_reference(&p->info.index.resource, buffer);
4100
4101 p->num_draws = dr;
4102
4103 /* Upload index buffers. */
4104 for (unsigned i = 0; i < dr; i++) {
4105 unsigned count = draws[i + total_offset].count;
4106
4107 if (!count) {
4108 p->slot[i].start = 0;
4109 p->slot[i].count = 0;
4110 p->slot[i].index_bias = 0;
4111 continue;
4112 }
4113
4114 unsigned size = count << index_size_shift;
4115 memcpy(ptr + offset,
4116 (uint8_t*)info->index.user +
4117 (draws[i + total_offset].start << index_size_shift), size);
4118 p->slot[i].start = (buffer_offset + offset) >> index_size_shift;
4119 p->slot[i].count = count;
4120 p->slot[i].index_bias = draws[i + total_offset].index_bias;
4121 offset += size;
4122 }
4123
4124 total_offset += dr;
4125 num_draws -= dr;
4126 }
4127 }
4128
4129 static void
tc_draw_indirect(struct pipe_context * _pipe,const struct pipe_draw_info * info,unsigned drawid_offset,const struct pipe_draw_indirect_info * indirect,const struct pipe_draw_start_count_bias * draws,unsigned num_draws)4130 tc_draw_indirect(struct pipe_context *_pipe, const struct pipe_draw_info *info,
4131 unsigned drawid_offset,
4132 const struct pipe_draw_indirect_info *indirect,
4133 const struct pipe_draw_start_count_bias *draws,
4134 unsigned num_draws)
4135 {
4136 struct threaded_context *tc = threaded_context(_pipe);
4137 assert(!info->has_user_indices);
4138 assert(num_draws == 1);
4139
4140 struct tc_draw_indirect *p =
4141 tc_add_call(tc, TC_CALL_draw_indirect, tc_draw_indirect);
4142 struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
4143
4144 if (info->index_size) {
4145 if (!info->take_index_buffer_ownership) {
4146 tc_set_resource_reference(&p->info.index.resource,
4147 info->index.resource);
4148 }
4149 tc_add_to_buffer_list(next, info->index.resource);
4150 }
4151 memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX);
4152
4153 tc_set_resource_reference(&p->indirect.buffer, indirect->buffer);
4154 tc_set_resource_reference(&p->indirect.indirect_draw_count,
4155 indirect->indirect_draw_count);
4156 p->indirect.count_from_stream_output = NULL;
4157 pipe_so_target_reference(&p->indirect.count_from_stream_output,
4158 indirect->count_from_stream_output);
4159
4160 if (indirect->buffer)
4161 tc_add_to_buffer_list(next, indirect->buffer);
4162 if (indirect->indirect_draw_count)
4163 tc_add_to_buffer_list(next, indirect->indirect_draw_count);
4164 if (indirect->count_from_stream_output)
4165 tc_add_to_buffer_list(next, indirect->count_from_stream_output->buffer);
4166
4167 memcpy(&p->indirect, indirect, sizeof(*indirect));
4168 p->draw.start = draws[0].start;
4169 }
4170
4171 /* Dispatch table for tc_draw_vbo:
4172 *
4173 * Indexed by:
4174 * [is_indirect * 8 + index_size_and_has_user_indices * 4 +
4175 * is_multi_draw * 2 + non_zero_draw_id]
4176 */
4177 static pipe_draw_func draw_funcs[16] = {
4178 tc_draw_single,
4179 tc_draw_single_draw_id,
4180 tc_draw_multi,
4181 tc_draw_multi,
4182 tc_draw_user_indices_single,
4183 tc_draw_user_indices_single_draw_id,
4184 tc_draw_user_indices_multi,
4185 tc_draw_user_indices_multi,
4186 tc_draw_indirect,
4187 tc_draw_indirect,
4188 tc_draw_indirect,
4189 tc_draw_indirect,
4190 tc_draw_indirect,
4191 tc_draw_indirect,
4192 tc_draw_indirect,
4193 tc_draw_indirect,
4194 };
4195
4196 void
tc_draw_vbo(struct pipe_context * _pipe,const struct pipe_draw_info * info,unsigned drawid_offset,const struct pipe_draw_indirect_info * indirect,const struct pipe_draw_start_count_bias * draws,unsigned num_draws)4197 tc_draw_vbo(struct pipe_context *_pipe, const struct pipe_draw_info *info,
4198 unsigned drawid_offset,
4199 const struct pipe_draw_indirect_info *indirect,
4200 const struct pipe_draw_start_count_bias *draws,
4201 unsigned num_draws)
4202 {
4203 STATIC_ASSERT(DRAW_INFO_SIZE_WITHOUT_INDEXBUF_AND_MIN_MAX_INDEX +
4204 sizeof(intptr_t) == offsetof(struct pipe_draw_info, min_index));
4205
4206 struct threaded_context *tc = threaded_context(_pipe);
4207 if (tc->options.parse_renderpass_info)
4208 tc_parse_draw(tc);
4209
4210 /* Use a function table to call the desired variant of draw_vbo. */
4211 unsigned index = (indirect != NULL) * 8 +
4212 (info->index_size && info->has_user_indices) * 4 +
4213 (num_draws > 1) * 2 + (drawid_offset != 0);
4214 draw_funcs[index](_pipe, info, drawid_offset, indirect, draws, num_draws);
4215
4216 /* This must be after tc_add_*call, which can flush the batch. */
4217 if (unlikely(tc->add_all_gfx_bindings_to_buffer_list))
4218 tc_add_all_gfx_bindings_to_buffer_list(tc);
4219 }
4220
4221 struct tc_draw_single *
tc_add_draw_single_call(struct pipe_context * _pipe,struct pipe_resource * index_bo)4222 tc_add_draw_single_call(struct pipe_context *_pipe,
4223 struct pipe_resource *index_bo)
4224 {
4225 struct threaded_context *tc = threaded_context(_pipe);
4226
4227 if (tc->options.parse_renderpass_info)
4228 tc_parse_draw(tc);
4229
4230 struct tc_draw_single *p =
4231 tc_add_call(tc, TC_CALL_draw_single, tc_draw_single);
4232
4233 if (index_bo)
4234 tc_add_to_buffer_list(&tc->buffer_lists[tc->next_buf_list], index_bo);
4235
4236 /* This must be after tc_add_*call, which can flush the batch. */
4237 if (unlikely(tc->add_all_gfx_bindings_to_buffer_list))
4238 tc_add_all_gfx_bindings_to_buffer_list(tc);
4239
4240 return p;
4241 }
4242
4243 struct tc_draw_vstate_single {
4244 struct tc_call_base base;
4245 struct pipe_draw_start_count_bias draw;
4246
4247 /* The following states must be together without holes because they are
4248 * compared by draw merging.
4249 */
4250 struct pipe_vertex_state *state;
4251 uint32_t partial_velem_mask;
4252 struct pipe_draw_vertex_state_info info;
4253 };
4254
4255 static bool
is_next_call_a_mergeable_draw_vstate(struct tc_draw_vstate_single * first,struct tc_draw_vstate_single * next)4256 is_next_call_a_mergeable_draw_vstate(struct tc_draw_vstate_single *first,
4257 struct tc_draw_vstate_single *next)
4258 {
4259 if (next->base.call_id != TC_CALL_draw_vstate_single)
4260 return false;
4261
4262 return !memcmp(&first->state, &next->state,
4263 offsetof(struct tc_draw_vstate_single, info) +
4264 sizeof(struct pipe_draw_vertex_state_info) -
4265 offsetof(struct tc_draw_vstate_single, state));
4266 }
4267
4268 static uint16_t
tc_call_draw_vstate_single(struct pipe_context * pipe,void * call)4269 tc_call_draw_vstate_single(struct pipe_context *pipe, void *call)
4270 {
4271 /* Draw call merging. */
4272 struct tc_draw_vstate_single *first = to_call(call, tc_draw_vstate_single);
4273 struct tc_draw_vstate_single *next = get_next_call(first, tc_draw_vstate_single);
4274
4275 /* If at least 2 consecutive draw calls can be merged... */
4276 if (is_next_call_a_mergeable_draw_vstate(first, next)) {
4277 /* The maximum number of merged draws is given by the batch size. */
4278 struct pipe_draw_start_count_bias draws[TC_SLOTS_PER_BATCH /
4279 call_size(tc_draw_vstate_single)];
4280 unsigned num_draws = 2;
4281
4282 draws[0] = first->draw;
4283 draws[1] = next->draw;
4284
4285 /* Find how many other draws can be merged. */
4286 next = get_next_call(next, tc_draw_vstate_single);
4287 for (; is_next_call_a_mergeable_draw_vstate(first, next);
4288 next = get_next_call(next, tc_draw_vstate_single),
4289 num_draws++)
4290 draws[num_draws] = next->draw;
4291
4292 pipe->draw_vertex_state(pipe, first->state, first->partial_velem_mask,
4293 first->info, draws, num_draws);
4294 /* Since all draws use the same state, drop all references at once. */
4295 tc_drop_vertex_state_references(first->state, num_draws);
4296
4297 return call_size(tc_draw_vstate_single) * num_draws;
4298 }
4299
4300 pipe->draw_vertex_state(pipe, first->state, first->partial_velem_mask,
4301 first->info, &first->draw, 1);
4302 tc_drop_vertex_state_references(first->state, 1);
4303 return call_size(tc_draw_vstate_single);
4304 }
4305
4306 struct tc_draw_vstate_multi {
4307 struct tc_call_base base;
4308 uint32_t partial_velem_mask;
4309 struct pipe_draw_vertex_state_info info;
4310 unsigned num_draws;
4311 struct pipe_vertex_state *state;
4312 struct pipe_draw_start_count_bias slot[0];
4313 };
4314
4315 static uint16_t
tc_call_draw_vstate_multi(struct pipe_context * pipe,void * call)4316 tc_call_draw_vstate_multi(struct pipe_context *pipe, void *call)
4317 {
4318 struct tc_draw_vstate_multi *info = (struct tc_draw_vstate_multi*)call;
4319
4320 pipe->draw_vertex_state(pipe, info->state, info->partial_velem_mask,
4321 info->info, info->slot, info->num_draws);
4322 tc_drop_vertex_state_references(info->state, 1);
4323 return info->base.num_slots;
4324 }
4325
4326 static void
tc_draw_vertex_state(struct pipe_context * _pipe,struct pipe_vertex_state * state,uint32_t partial_velem_mask,struct pipe_draw_vertex_state_info info,const struct pipe_draw_start_count_bias * draws,unsigned num_draws)4327 tc_draw_vertex_state(struct pipe_context *_pipe,
4328 struct pipe_vertex_state *state,
4329 uint32_t partial_velem_mask,
4330 struct pipe_draw_vertex_state_info info,
4331 const struct pipe_draw_start_count_bias *draws,
4332 unsigned num_draws)
4333 {
4334 struct threaded_context *tc = threaded_context(_pipe);
4335 if (tc->options.parse_renderpass_info)
4336 tc_parse_draw(tc);
4337
4338 if (num_draws == 1) {
4339 /* Single draw. */
4340 struct tc_draw_vstate_single *p =
4341 tc_add_call(tc, TC_CALL_draw_vstate_single, tc_draw_vstate_single);
4342 p->partial_velem_mask = partial_velem_mask;
4343 p->draw = draws[0];
4344 p->info.mode = info.mode;
4345 p->info.take_vertex_state_ownership = false;
4346
4347 /* This should be always 0 for simplicity because we assume that
4348 * index_bias doesn't vary.
4349 */
4350 assert(draws[0].index_bias == 0);
4351
4352 if (!info.take_vertex_state_ownership)
4353 tc_set_vertex_state_reference(&p->state, state);
4354 else
4355 p->state = state;
4356
4357
4358 /* This must be after tc_add_*call, which can flush the batch. */
4359 if (unlikely(tc->add_all_gfx_bindings_to_buffer_list))
4360 tc_add_all_gfx_bindings_to_buffer_list(tc);
4361 return;
4362 }
4363
4364 const int draw_overhead_bytes = sizeof(struct tc_draw_vstate_multi);
4365 const int one_draw_slot_bytes = sizeof(((struct tc_draw_vstate_multi*)NULL)->slot[0]);
4366 const int slots_for_one_draw = DIV_ROUND_UP(draw_overhead_bytes + one_draw_slot_bytes,
4367 sizeof(struct tc_call_base));
4368 /* Multi draw. */
4369 int total_offset = 0;
4370 bool take_vertex_state_ownership = info.take_vertex_state_ownership;
4371 while (num_draws) {
4372 struct tc_batch *next = &tc->batch_slots[tc->next];
4373
4374 int nb_slots_left = TC_SLOTS_PER_BATCH - 1 - next->num_total_slots;
4375 /* If there isn't enough place for one draw, try to fill the next one */
4376 if (nb_slots_left < slots_for_one_draw)
4377 nb_slots_left = TC_SLOTS_PER_BATCH - 1;
4378 const int size_left_bytes = nb_slots_left * sizeof(struct tc_call_base);
4379
4380 /* How many draws can we fit in the current batch */
4381 const int dr = MIN2(num_draws, (size_left_bytes - draw_overhead_bytes) / one_draw_slot_bytes);
4382
4383 /* Non-indexed call or indexed with a real index buffer. */
4384 struct tc_draw_vstate_multi *p =
4385 tc_add_slot_based_call(tc, TC_CALL_draw_vstate_multi, tc_draw_vstate_multi, dr);
4386
4387 if (!take_vertex_state_ownership)
4388 tc_set_vertex_state_reference(&p->state, state);
4389 else
4390 p->state = state;
4391
4392 take_vertex_state_ownership = false;
4393 p->partial_velem_mask = partial_velem_mask;
4394 p->info.mode = info.mode;
4395 p->info.take_vertex_state_ownership = false;
4396 p->num_draws = dr;
4397 memcpy(p->slot, &draws[total_offset], sizeof(draws[0]) * dr);
4398 num_draws -= dr;
4399
4400 total_offset += dr;
4401 }
4402
4403
4404 /* This must be after tc_add_*call, which can flush the batch. */
4405 if (unlikely(tc->add_all_gfx_bindings_to_buffer_list))
4406 tc_add_all_gfx_bindings_to_buffer_list(tc);
4407 }
4408
4409 struct tc_launch_grid_call {
4410 struct tc_call_base base;
4411 struct pipe_grid_info info;
4412 };
4413
4414 static uint16_t
tc_call_launch_grid(struct pipe_context * pipe,void * call)4415 tc_call_launch_grid(struct pipe_context *pipe, void *call)
4416 {
4417 struct pipe_grid_info *p = &to_call(call, tc_launch_grid_call)->info;
4418
4419 pipe->launch_grid(pipe, p);
4420 tc_drop_resource_reference(p->indirect);
4421 return call_size(tc_launch_grid_call);
4422 }
4423
4424 static void
tc_launch_grid(struct pipe_context * _pipe,const struct pipe_grid_info * info)4425 tc_launch_grid(struct pipe_context *_pipe,
4426 const struct pipe_grid_info *info)
4427 {
4428 struct threaded_context *tc = threaded_context(_pipe);
4429 struct tc_launch_grid_call *p = tc_add_call(tc, TC_CALL_launch_grid,
4430 tc_launch_grid_call);
4431 assert(info->input == NULL);
4432
4433 tc_set_resource_reference(&p->info.indirect, info->indirect);
4434 memcpy(&p->info, info, sizeof(*info));
4435
4436 if (info->indirect)
4437 tc_add_to_buffer_list(&tc->buffer_lists[tc->next_buf_list], info->indirect);
4438
4439 /* This must be after tc_add_*call, which can flush the batch. */
4440 if (unlikely(tc->add_all_compute_bindings_to_buffer_list))
4441 tc_add_all_compute_bindings_to_buffer_list(tc);
4442 }
4443
4444 static uint16_t
tc_call_resource_copy_region(struct pipe_context * pipe,void * call)4445 tc_call_resource_copy_region(struct pipe_context *pipe, void *call)
4446 {
4447 struct tc_resource_copy_region *p = to_call(call, tc_resource_copy_region);
4448
4449 pipe->resource_copy_region(pipe, p->dst, p->dst_level, p->dstx, p->dsty,
4450 p->dstz, p->src, p->src_level, &p->src_box);
4451 tc_drop_resource_reference(p->dst);
4452 tc_drop_resource_reference(p->src);
4453 return call_size(tc_resource_copy_region);
4454 }
4455
4456 static void
tc_resource_copy_region(struct pipe_context * _pipe,struct pipe_resource * dst,unsigned dst_level,unsigned dstx,unsigned dsty,unsigned dstz,struct pipe_resource * src,unsigned src_level,const struct pipe_box * src_box)4457 tc_resource_copy_region(struct pipe_context *_pipe,
4458 struct pipe_resource *dst, unsigned dst_level,
4459 unsigned dstx, unsigned dsty, unsigned dstz,
4460 struct pipe_resource *src, unsigned src_level,
4461 const struct pipe_box *src_box)
4462 {
4463 struct threaded_context *tc = threaded_context(_pipe);
4464 struct threaded_resource *tdst = threaded_resource(dst);
4465 struct tc_resource_copy_region *p =
4466 tc_add_call(tc, TC_CALL_resource_copy_region,
4467 tc_resource_copy_region);
4468
4469 if (dst->target == PIPE_BUFFER)
4470 tc_buffer_disable_cpu_storage(dst);
4471
4472 tc_set_resource_batch_usage(tc, dst);
4473 tc_set_resource_reference(&p->dst, dst);
4474 p->dst_level = dst_level;
4475 p->dstx = dstx;
4476 p->dsty = dsty;
4477 p->dstz = dstz;
4478 tc_set_resource_batch_usage(tc, src);
4479 tc_set_resource_reference(&p->src, src);
4480 p->src_level = src_level;
4481 p->src_box = *src_box;
4482
4483 if (dst->target == PIPE_BUFFER) {
4484 struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
4485
4486 tc_add_to_buffer_list(next, src);
4487 tc_add_to_buffer_list(next, dst);
4488
4489 util_range_add(&tdst->b, &tdst->valid_buffer_range,
4490 dstx, dstx + src_box->width);
4491 }
4492 }
4493
4494 struct tc_blit_call {
4495 struct tc_call_base base;
4496 struct pipe_blit_info info;
4497 };
4498
4499 static uint16_t
tc_call_blit(struct pipe_context * pipe,void * call)4500 tc_call_blit(struct pipe_context *pipe, void *call)
4501 {
4502 struct pipe_blit_info *blit = &to_call(call, tc_blit_call)->info;
4503
4504 pipe->blit(pipe, blit);
4505 tc_drop_resource_reference(blit->dst.resource);
4506 tc_drop_resource_reference(blit->src.resource);
4507 return call_size(tc_blit_call);
4508 }
4509
4510 static void
tc_blit(struct pipe_context * _pipe,const struct pipe_blit_info * info)4511 tc_blit(struct pipe_context *_pipe, const struct pipe_blit_info *info)
4512 {
4513 struct threaded_context *tc = threaded_context(_pipe);
4514 struct tc_blit_call *blit = tc_add_call(tc, TC_CALL_blit, tc_blit_call);
4515
4516 tc_set_resource_batch_usage(tc, info->dst.resource);
4517 tc_set_resource_reference(&blit->info.dst.resource, info->dst.resource);
4518 tc_set_resource_batch_usage(tc, info->src.resource);
4519 tc_set_resource_reference(&blit->info.src.resource, info->src.resource);
4520 memcpy(&blit->info, info, sizeof(*info));
4521 if (tc->options.parse_renderpass_info) {
4522 tc->renderpass_info_recording->has_resolve = info->src.resource->nr_samples > 1 &&
4523 info->dst.resource->nr_samples <= 1 &&
4524 tc->fb_resolve == info->dst.resource;
4525 }
4526 }
4527
4528 struct tc_generate_mipmap {
4529 struct tc_call_base base;
4530 enum pipe_format format;
4531 unsigned base_level;
4532 unsigned last_level;
4533 unsigned first_layer;
4534 unsigned last_layer;
4535 struct pipe_resource *res;
4536 };
4537
4538 static uint16_t
tc_call_generate_mipmap(struct pipe_context * pipe,void * call)4539 tc_call_generate_mipmap(struct pipe_context *pipe, void *call)
4540 {
4541 struct tc_generate_mipmap *p = to_call(call, tc_generate_mipmap);
4542 ASSERTED bool result = pipe->generate_mipmap(pipe, p->res, p->format,
4543 p->base_level,
4544 p->last_level,
4545 p->first_layer,
4546 p->last_layer);
4547 assert(result);
4548 tc_drop_resource_reference(p->res);
4549 return call_size(tc_generate_mipmap);
4550 }
4551
4552 static bool
tc_generate_mipmap(struct pipe_context * _pipe,struct pipe_resource * res,enum pipe_format format,unsigned base_level,unsigned last_level,unsigned first_layer,unsigned last_layer)4553 tc_generate_mipmap(struct pipe_context *_pipe,
4554 struct pipe_resource *res,
4555 enum pipe_format format,
4556 unsigned base_level,
4557 unsigned last_level,
4558 unsigned first_layer,
4559 unsigned last_layer)
4560 {
4561 struct threaded_context *tc = threaded_context(_pipe);
4562 struct pipe_context *pipe = tc->pipe;
4563 struct pipe_screen *screen = pipe->screen;
4564 unsigned bind = PIPE_BIND_SAMPLER_VIEW;
4565
4566 if (util_format_is_depth_or_stencil(format))
4567 bind = PIPE_BIND_DEPTH_STENCIL;
4568 else
4569 bind = PIPE_BIND_RENDER_TARGET;
4570
4571 if (!screen->is_format_supported(screen, format, res->target,
4572 res->nr_samples, res->nr_storage_samples,
4573 bind))
4574 return false;
4575
4576 struct tc_generate_mipmap *p =
4577 tc_add_call(tc, TC_CALL_generate_mipmap, tc_generate_mipmap);
4578
4579 tc_set_resource_batch_usage(tc, res);
4580 tc_set_resource_reference(&p->res, res);
4581 p->format = format;
4582 p->base_level = base_level;
4583 p->last_level = last_level;
4584 p->first_layer = first_layer;
4585 p->last_layer = last_layer;
4586 return true;
4587 }
4588
4589 struct tc_resource_call {
4590 struct tc_call_base base;
4591 struct pipe_resource *resource;
4592 };
4593
4594 static uint16_t
tc_call_flush_resource(struct pipe_context * pipe,void * call)4595 tc_call_flush_resource(struct pipe_context *pipe, void *call)
4596 {
4597 struct pipe_resource *resource = to_call(call, tc_resource_call)->resource;
4598
4599 pipe->flush_resource(pipe, resource);
4600 tc_drop_resource_reference(resource);
4601 return call_size(tc_resource_call);
4602 }
4603
4604 static void
tc_flush_resource(struct pipe_context * _pipe,struct pipe_resource * resource)4605 tc_flush_resource(struct pipe_context *_pipe, struct pipe_resource *resource)
4606 {
4607 struct threaded_context *tc = threaded_context(_pipe);
4608 struct tc_resource_call *call = tc_add_call(tc, TC_CALL_flush_resource,
4609 tc_resource_call);
4610
4611 tc_set_resource_batch_usage(tc, resource);
4612 tc_set_resource_reference(&call->resource, resource);
4613 }
4614
4615 static uint16_t
tc_call_invalidate_resource(struct pipe_context * pipe,void * call)4616 tc_call_invalidate_resource(struct pipe_context *pipe, void *call)
4617 {
4618 struct pipe_resource *resource = to_call(call, tc_resource_call)->resource;
4619
4620 pipe->invalidate_resource(pipe, resource);
4621 tc_drop_resource_reference(resource);
4622 return call_size(tc_resource_call);
4623 }
4624
4625 static void
tc_invalidate_resource(struct pipe_context * _pipe,struct pipe_resource * resource)4626 tc_invalidate_resource(struct pipe_context *_pipe,
4627 struct pipe_resource *resource)
4628 {
4629 struct threaded_context *tc = threaded_context(_pipe);
4630
4631 if (resource->target == PIPE_BUFFER) {
4632 tc_invalidate_buffer(tc, threaded_resource(resource));
4633 return;
4634 }
4635
4636 struct tc_resource_call *call = tc_add_call(tc, TC_CALL_invalidate_resource,
4637 tc_resource_call);
4638 tc_set_resource_batch_usage(tc, resource);
4639 tc_set_resource_reference(&call->resource, resource);
4640
4641 struct tc_renderpass_info *info = tc_get_renderpass_info(tc);
4642 if (info) {
4643 if (tc->fb_resources[PIPE_MAX_COLOR_BUFS] == resource) {
4644 info->zsbuf_invalidate = true;
4645 } else {
4646 for (unsigned i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
4647 if (tc->fb_resources[i] == resource)
4648 info->cbuf_invalidate |= BITFIELD_BIT(i);
4649 }
4650 }
4651 }
4652 }
4653
4654 struct tc_clear {
4655 struct tc_call_base base;
4656 bool scissor_state_set;
4657 uint8_t stencil;
4658 uint16_t buffers;
4659 float depth;
4660 struct pipe_scissor_state scissor_state;
4661 union pipe_color_union color;
4662 };
4663
4664 static uint16_t
tc_call_clear(struct pipe_context * pipe,void * call)4665 tc_call_clear(struct pipe_context *pipe, void *call)
4666 {
4667 struct tc_clear *p = to_call(call, tc_clear);
4668
4669 pipe->clear(pipe, p->buffers, p->scissor_state_set ? &p->scissor_state : NULL, &p->color, p->depth, p->stencil);
4670 return call_size(tc_clear);
4671 }
4672
4673 static void
tc_clear(struct pipe_context * _pipe,unsigned buffers,const struct pipe_scissor_state * scissor_state,const union pipe_color_union * color,double depth,unsigned stencil)4674 tc_clear(struct pipe_context *_pipe, unsigned buffers, const struct pipe_scissor_state *scissor_state,
4675 const union pipe_color_union *color, double depth,
4676 unsigned stencil)
4677 {
4678 struct threaded_context *tc = threaded_context(_pipe);
4679 struct tc_clear *p = tc_add_call(tc, TC_CALL_clear, tc_clear);
4680
4681 p->buffers = buffers;
4682 if (scissor_state) {
4683 p->scissor_state = *scissor_state;
4684 struct tc_renderpass_info *info = tc_get_renderpass_info(tc);
4685 /* partial clear info is useful for drivers to know whether any zs writes occur;
4686 * drivers are responsible for optimizing partial clear -> full clear
4687 */
4688 if (info && buffers & PIPE_CLEAR_DEPTHSTENCIL)
4689 info->zsbuf_clear_partial |= !info->zsbuf_clear;
4690 } else {
4691 struct tc_renderpass_info *info = tc_get_renderpass_info(tc);
4692 if (info) {
4693 /* full clears use a different load operation, but are only valid if draws haven't occurred yet */
4694 info->cbuf_clear |= (buffers >> 2) & ~info->cbuf_load;
4695 if (buffers & PIPE_CLEAR_DEPTHSTENCIL) {
4696 if (!info->zsbuf_load && !info->zsbuf_clear_partial)
4697 info->zsbuf_clear = true;
4698 else if (!info->zsbuf_clear)
4699 /* this is a clear that occurred after a draw: flag as partial to ensure it isn't ignored */
4700 info->zsbuf_clear_partial = true;
4701 }
4702 }
4703 }
4704 p->scissor_state_set = !!scissor_state;
4705 p->color = *color;
4706 p->depth = depth;
4707 p->stencil = stencil;
4708 }
4709
4710 struct tc_clear_render_target {
4711 struct tc_call_base base;
4712 bool render_condition_enabled;
4713 unsigned dstx;
4714 unsigned dsty;
4715 unsigned width;
4716 unsigned height;
4717 union pipe_color_union color;
4718 struct pipe_surface *dst;
4719 };
4720
4721 static uint16_t
tc_call_clear_render_target(struct pipe_context * pipe,void * call)4722 tc_call_clear_render_target(struct pipe_context *pipe, void *call)
4723 {
4724 struct tc_clear_render_target *p = to_call(call, tc_clear_render_target);
4725
4726 pipe->clear_render_target(pipe, p->dst, &p->color, p->dstx, p->dsty, p->width, p->height,
4727 p->render_condition_enabled);
4728 tc_drop_surface_reference(p->dst);
4729 return call_size(tc_clear_render_target);
4730 }
4731
4732 static void
tc_clear_render_target(struct pipe_context * _pipe,struct pipe_surface * dst,const union pipe_color_union * color,unsigned dstx,unsigned dsty,unsigned width,unsigned height,bool render_condition_enabled)4733 tc_clear_render_target(struct pipe_context *_pipe,
4734 struct pipe_surface *dst,
4735 const union pipe_color_union *color,
4736 unsigned dstx, unsigned dsty,
4737 unsigned width, unsigned height,
4738 bool render_condition_enabled)
4739 {
4740 struct threaded_context *tc = threaded_context(_pipe);
4741 struct tc_clear_render_target *p = tc_add_call(tc, TC_CALL_clear_render_target, tc_clear_render_target);
4742 p->dst = NULL;
4743 pipe_surface_reference(&p->dst, dst);
4744 p->color = *color;
4745 p->dstx = dstx;
4746 p->dsty = dsty;
4747 p->width = width;
4748 p->height = height;
4749 p->render_condition_enabled = render_condition_enabled;
4750 }
4751
4752
4753 struct tc_clear_depth_stencil {
4754 struct tc_call_base base;
4755 bool render_condition_enabled;
4756 float depth;
4757 unsigned clear_flags;
4758 unsigned stencil;
4759 unsigned dstx;
4760 unsigned dsty;
4761 unsigned width;
4762 unsigned height;
4763 struct pipe_surface *dst;
4764 };
4765
4766
4767 static uint16_t
tc_call_clear_depth_stencil(struct pipe_context * pipe,void * call)4768 tc_call_clear_depth_stencil(struct pipe_context *pipe, void *call)
4769 {
4770 struct tc_clear_depth_stencil *p = to_call(call, tc_clear_depth_stencil);
4771
4772 pipe->clear_depth_stencil(pipe, p->dst, p->clear_flags, p->depth, p->stencil,
4773 p->dstx, p->dsty, p->width, p->height,
4774 p->render_condition_enabled);
4775 tc_drop_surface_reference(p->dst);
4776 return call_size(tc_clear_depth_stencil);
4777 }
4778
4779 static void
tc_clear_depth_stencil(struct pipe_context * _pipe,struct pipe_surface * dst,unsigned clear_flags,double depth,unsigned stencil,unsigned dstx,unsigned dsty,unsigned width,unsigned height,bool render_condition_enabled)4780 tc_clear_depth_stencil(struct pipe_context *_pipe,
4781 struct pipe_surface *dst, unsigned clear_flags,
4782 double depth, unsigned stencil, unsigned dstx,
4783 unsigned dsty, unsigned width, unsigned height,
4784 bool render_condition_enabled)
4785 {
4786 struct threaded_context *tc = threaded_context(_pipe);
4787 struct tc_clear_depth_stencil *p = tc_add_call(tc, TC_CALL_clear_depth_stencil, tc_clear_depth_stencil);
4788 p->dst = NULL;
4789 pipe_surface_reference(&p->dst, dst);
4790 p->clear_flags = clear_flags;
4791 p->depth = depth;
4792 p->stencil = stencil;
4793 p->dstx = dstx;
4794 p->dsty = dsty;
4795 p->width = width;
4796 p->height = height;
4797 p->render_condition_enabled = render_condition_enabled;
4798 }
4799
4800 struct tc_clear_buffer {
4801 struct tc_call_base base;
4802 uint8_t clear_value_size;
4803 unsigned offset;
4804 unsigned size;
4805 char clear_value[16];
4806 struct pipe_resource *res;
4807 };
4808
4809 static uint16_t
tc_call_clear_buffer(struct pipe_context * pipe,void * call)4810 tc_call_clear_buffer(struct pipe_context *pipe, void *call)
4811 {
4812 struct tc_clear_buffer *p = to_call(call, tc_clear_buffer);
4813
4814 pipe->clear_buffer(pipe, p->res, p->offset, p->size, p->clear_value,
4815 p->clear_value_size);
4816 tc_drop_resource_reference(p->res);
4817 return call_size(tc_clear_buffer);
4818 }
4819
4820 static void
tc_clear_buffer(struct pipe_context * _pipe,struct pipe_resource * res,unsigned offset,unsigned size,const void * clear_value,int clear_value_size)4821 tc_clear_buffer(struct pipe_context *_pipe, struct pipe_resource *res,
4822 unsigned offset, unsigned size,
4823 const void *clear_value, int clear_value_size)
4824 {
4825 struct threaded_context *tc = threaded_context(_pipe);
4826 struct threaded_resource *tres = threaded_resource(res);
4827 struct tc_clear_buffer *p =
4828 tc_add_call(tc, TC_CALL_clear_buffer, tc_clear_buffer);
4829
4830 tc_buffer_disable_cpu_storage(res);
4831
4832 tc_set_resource_reference(&p->res, res);
4833 tc_add_to_buffer_list(&tc->buffer_lists[tc->next_buf_list], res);
4834 p->offset = offset;
4835 p->size = size;
4836 memcpy(p->clear_value, clear_value, clear_value_size);
4837 p->clear_value_size = clear_value_size;
4838
4839 util_range_add(&tres->b, &tres->valid_buffer_range, offset, offset + size);
4840 }
4841
4842 struct tc_clear_texture {
4843 struct tc_call_base base;
4844 unsigned level;
4845 struct pipe_box box;
4846 char data[16];
4847 struct pipe_resource *res;
4848 };
4849
4850 static uint16_t
tc_call_clear_texture(struct pipe_context * pipe,void * call)4851 tc_call_clear_texture(struct pipe_context *pipe, void *call)
4852 {
4853 struct tc_clear_texture *p = to_call(call, tc_clear_texture);
4854
4855 pipe->clear_texture(pipe, p->res, p->level, &p->box, p->data);
4856 tc_drop_resource_reference(p->res);
4857 return call_size(tc_clear_texture);
4858 }
4859
4860 static void
tc_clear_texture(struct pipe_context * _pipe,struct pipe_resource * res,unsigned level,const struct pipe_box * box,const void * data)4861 tc_clear_texture(struct pipe_context *_pipe, struct pipe_resource *res,
4862 unsigned level, const struct pipe_box *box, const void *data)
4863 {
4864 struct threaded_context *tc = threaded_context(_pipe);
4865 struct tc_clear_texture *p =
4866 tc_add_call(tc, TC_CALL_clear_texture, tc_clear_texture);
4867
4868 tc_set_resource_batch_usage(tc, res);
4869 tc_set_resource_reference(&p->res, res);
4870 p->level = level;
4871 p->box = *box;
4872 memcpy(p->data, data,
4873 util_format_get_blocksize(res->format));
4874 }
4875
4876 struct tc_resource_commit {
4877 struct tc_call_base base;
4878 bool commit;
4879 unsigned level;
4880 struct pipe_box box;
4881 struct pipe_resource *res;
4882 };
4883
4884 static uint16_t
tc_call_resource_commit(struct pipe_context * pipe,void * call)4885 tc_call_resource_commit(struct pipe_context *pipe, void *call)
4886 {
4887 struct tc_resource_commit *p = to_call(call, tc_resource_commit);
4888
4889 pipe->resource_commit(pipe, p->res, p->level, &p->box, p->commit);
4890 tc_drop_resource_reference(p->res);
4891 return call_size(tc_resource_commit);
4892 }
4893
4894 static bool
tc_resource_commit(struct pipe_context * _pipe,struct pipe_resource * res,unsigned level,struct pipe_box * box,bool commit)4895 tc_resource_commit(struct pipe_context *_pipe, struct pipe_resource *res,
4896 unsigned level, struct pipe_box *box, bool commit)
4897 {
4898 struct threaded_context *tc = threaded_context(_pipe);
4899 struct tc_resource_commit *p =
4900 tc_add_call(tc, TC_CALL_resource_commit, tc_resource_commit);
4901
4902 tc_set_resource_reference(&p->res, res);
4903 tc_set_resource_batch_usage(tc, res);
4904 p->level = level;
4905 p->box = *box;
4906 p->commit = commit;
4907 return true; /* we don't care about the return value for this call */
4908 }
4909
4910 static unsigned
tc_init_intel_perf_query_info(struct pipe_context * _pipe)4911 tc_init_intel_perf_query_info(struct pipe_context *_pipe)
4912 {
4913 struct threaded_context *tc = threaded_context(_pipe);
4914 struct pipe_context *pipe = tc->pipe;
4915
4916 return pipe->init_intel_perf_query_info(pipe);
4917 }
4918
4919 static void
tc_get_intel_perf_query_info(struct pipe_context * _pipe,unsigned query_index,const char ** name,uint32_t * data_size,uint32_t * n_counters,uint32_t * n_active)4920 tc_get_intel_perf_query_info(struct pipe_context *_pipe,
4921 unsigned query_index,
4922 const char **name,
4923 uint32_t *data_size,
4924 uint32_t *n_counters,
4925 uint32_t *n_active)
4926 {
4927 struct threaded_context *tc = threaded_context(_pipe);
4928 struct pipe_context *pipe = tc->pipe;
4929
4930 tc_sync(tc); /* n_active vs begin/end_intel_perf_query */
4931 pipe->get_intel_perf_query_info(pipe, query_index, name, data_size,
4932 n_counters, n_active);
4933 }
4934
4935 static void
tc_get_intel_perf_query_counter_info(struct pipe_context * _pipe,unsigned query_index,unsigned counter_index,const char ** name,const char ** desc,uint32_t * offset,uint32_t * data_size,uint32_t * type_enum,uint32_t * data_type_enum,uint64_t * raw_max)4936 tc_get_intel_perf_query_counter_info(struct pipe_context *_pipe,
4937 unsigned query_index,
4938 unsigned counter_index,
4939 const char **name,
4940 const char **desc,
4941 uint32_t *offset,
4942 uint32_t *data_size,
4943 uint32_t *type_enum,
4944 uint32_t *data_type_enum,
4945 uint64_t *raw_max)
4946 {
4947 struct threaded_context *tc = threaded_context(_pipe);
4948 struct pipe_context *pipe = tc->pipe;
4949
4950 pipe->get_intel_perf_query_counter_info(pipe, query_index, counter_index,
4951 name, desc, offset, data_size, type_enum, data_type_enum, raw_max);
4952 }
4953
4954 static struct pipe_query *
tc_new_intel_perf_query_obj(struct pipe_context * _pipe,unsigned query_index)4955 tc_new_intel_perf_query_obj(struct pipe_context *_pipe, unsigned query_index)
4956 {
4957 struct threaded_context *tc = threaded_context(_pipe);
4958 struct pipe_context *pipe = tc->pipe;
4959
4960 return pipe->new_intel_perf_query_obj(pipe, query_index);
4961 }
4962
4963 static uint16_t
tc_call_begin_intel_perf_query(struct pipe_context * pipe,void * call)4964 tc_call_begin_intel_perf_query(struct pipe_context *pipe, void *call)
4965 {
4966 (void)pipe->begin_intel_perf_query(pipe, to_call(call, tc_query_call)->query);
4967 return call_size(tc_query_call);
4968 }
4969
4970 static bool
tc_begin_intel_perf_query(struct pipe_context * _pipe,struct pipe_query * q)4971 tc_begin_intel_perf_query(struct pipe_context *_pipe, struct pipe_query *q)
4972 {
4973 struct threaded_context *tc = threaded_context(_pipe);
4974
4975 tc_add_call(tc, TC_CALL_begin_intel_perf_query, tc_query_call)->query = q;
4976
4977 /* assume success, begin failure can be signaled from get_intel_perf_query_data */
4978 return true;
4979 }
4980
4981 static uint16_t
tc_call_end_intel_perf_query(struct pipe_context * pipe,void * call)4982 tc_call_end_intel_perf_query(struct pipe_context *pipe, void *call)
4983 {
4984 pipe->end_intel_perf_query(pipe, to_call(call, tc_query_call)->query);
4985 return call_size(tc_query_call);
4986 }
4987
4988 static void
tc_end_intel_perf_query(struct pipe_context * _pipe,struct pipe_query * q)4989 tc_end_intel_perf_query(struct pipe_context *_pipe, struct pipe_query *q)
4990 {
4991 struct threaded_context *tc = threaded_context(_pipe);
4992
4993 tc_add_call(tc, TC_CALL_end_intel_perf_query, tc_query_call)->query = q;
4994 }
4995
4996 static void
tc_delete_intel_perf_query(struct pipe_context * _pipe,struct pipe_query * q)4997 tc_delete_intel_perf_query(struct pipe_context *_pipe, struct pipe_query *q)
4998 {
4999 struct threaded_context *tc = threaded_context(_pipe);
5000 struct pipe_context *pipe = tc->pipe;
5001
5002 tc_sync(tc); /* flush potentially pending begin/end_intel_perf_queries */
5003 pipe->delete_intel_perf_query(pipe, q);
5004 }
5005
5006 static void
tc_wait_intel_perf_query(struct pipe_context * _pipe,struct pipe_query * q)5007 tc_wait_intel_perf_query(struct pipe_context *_pipe, struct pipe_query *q)
5008 {
5009 struct threaded_context *tc = threaded_context(_pipe);
5010 struct pipe_context *pipe = tc->pipe;
5011
5012 tc_sync(tc); /* flush potentially pending begin/end_intel_perf_queries */
5013 pipe->wait_intel_perf_query(pipe, q);
5014 }
5015
5016 static bool
tc_is_intel_perf_query_ready(struct pipe_context * _pipe,struct pipe_query * q)5017 tc_is_intel_perf_query_ready(struct pipe_context *_pipe, struct pipe_query *q)
5018 {
5019 struct threaded_context *tc = threaded_context(_pipe);
5020 struct pipe_context *pipe = tc->pipe;
5021
5022 tc_sync(tc); /* flush potentially pending begin/end_intel_perf_queries */
5023 return pipe->is_intel_perf_query_ready(pipe, q);
5024 }
5025
5026 static bool
tc_get_intel_perf_query_data(struct pipe_context * _pipe,struct pipe_query * q,size_t data_size,uint32_t * data,uint32_t * bytes_written)5027 tc_get_intel_perf_query_data(struct pipe_context *_pipe,
5028 struct pipe_query *q,
5029 size_t data_size,
5030 uint32_t *data,
5031 uint32_t *bytes_written)
5032 {
5033 struct threaded_context *tc = threaded_context(_pipe);
5034 struct pipe_context *pipe = tc->pipe;
5035
5036 tc_sync(tc); /* flush potentially pending begin/end_intel_perf_queries */
5037 return pipe->get_intel_perf_query_data(pipe, q, data_size, data, bytes_written);
5038 }
5039
5040 /********************************************************************
5041 * callback
5042 */
5043
5044 struct tc_callback_call {
5045 struct tc_call_base base;
5046 void (*fn)(void *data);
5047 void *data;
5048 };
5049
5050 static uint16_t
tc_call_callback(UNUSED struct pipe_context * pipe,void * call)5051 tc_call_callback(UNUSED struct pipe_context *pipe, void *call)
5052 {
5053 struct tc_callback_call *p = to_call(call, tc_callback_call);
5054
5055 p->fn(p->data);
5056 return call_size(tc_callback_call);
5057 }
5058
5059 static void
tc_callback(struct pipe_context * _pipe,void (* fn)(void *),void * data,bool asap)5060 tc_callback(struct pipe_context *_pipe, void (*fn)(void *), void *data,
5061 bool asap)
5062 {
5063 struct threaded_context *tc = threaded_context(_pipe);
5064
5065 if (asap && tc_is_sync(tc)) {
5066 fn(data);
5067 return;
5068 }
5069
5070 struct tc_callback_call *p =
5071 tc_add_call(tc, TC_CALL_callback, tc_callback_call);
5072 p->fn = fn;
5073 p->data = data;
5074 }
5075
5076
5077 /********************************************************************
5078 * create & destroy
5079 */
5080
5081 static void
tc_destroy(struct pipe_context * _pipe)5082 tc_destroy(struct pipe_context *_pipe)
5083 {
5084 struct threaded_context *tc = threaded_context(_pipe);
5085 struct pipe_context *pipe = tc->pipe;
5086
5087 if (tc->base.const_uploader &&
5088 tc->base.stream_uploader != tc->base.const_uploader)
5089 u_upload_destroy(tc->base.const_uploader);
5090
5091 if (tc->base.stream_uploader)
5092 u_upload_destroy(tc->base.stream_uploader);
5093
5094 tc_sync(tc);
5095
5096 if (util_queue_is_initialized(&tc->queue)) {
5097 util_queue_destroy(&tc->queue);
5098
5099 for (unsigned i = 0; i < TC_MAX_BATCHES; i++) {
5100 util_queue_fence_destroy(&tc->batch_slots[i].fence);
5101 util_dynarray_fini(&tc->batch_slots[i].renderpass_infos);
5102 assert(!tc->batch_slots[i].token);
5103 }
5104 }
5105
5106 slab_destroy_child(&tc->pool_transfers);
5107 assert(tc->batch_slots[tc->next].num_total_slots == 0);
5108 pipe->destroy(pipe);
5109
5110 for (unsigned i = 0; i < TC_MAX_BUFFER_LISTS; i++) {
5111 if (!util_queue_fence_is_signalled(&tc->buffer_lists[i].driver_flushed_fence))
5112 util_queue_fence_signal(&tc->buffer_lists[i].driver_flushed_fence);
5113 util_queue_fence_destroy(&tc->buffer_lists[i].driver_flushed_fence);
5114 }
5115
5116 for (unsigned i = 0; i < ARRAY_SIZE(tc->fb_resources); i++)
5117 pipe_resource_reference(&tc->fb_resources[i], NULL);
5118 pipe_resource_reference(&tc->fb_resolve, NULL);
5119
5120 FREE(tc);
5121 }
5122
tc_driver_internal_flush_notify(struct threaded_context * tc)5123 void tc_driver_internal_flush_notify(struct threaded_context *tc)
5124 {
5125 /* Allow drivers to call this function even for internal contexts that
5126 * don't have tc. It simplifies drivers.
5127 */
5128 if (!tc)
5129 return;
5130
5131 /* Signal fences set by tc_batch_execute. */
5132 for (unsigned i = 0; i < tc->num_signal_fences_next_flush; i++)
5133 util_queue_fence_signal(tc->signal_fences_next_flush[i]);
5134
5135 tc->num_signal_fences_next_flush = 0;
5136 }
5137
5138 /**
5139 * Wrap an existing pipe_context into a threaded_context.
5140 *
5141 * \param pipe pipe_context to wrap
5142 * \param parent_transfer_pool parent slab pool set up for creating pipe_-
5143 * transfer objects; the driver should have one
5144 * in pipe_screen.
5145 * \param replace_buffer callback for replacing a pipe_resource's storage
5146 * with another pipe_resource's storage.
5147 * \param options optional TC options/callbacks
5148 * \param out if successful, the threaded_context will be returned here in
5149 * addition to the return value if "out" != NULL
5150 */
5151 struct pipe_context *
threaded_context_create(struct pipe_context * pipe,struct slab_parent_pool * parent_transfer_pool,tc_replace_buffer_storage_func replace_buffer,const struct threaded_context_options * options,struct threaded_context ** out)5152 threaded_context_create(struct pipe_context *pipe,
5153 struct slab_parent_pool *parent_transfer_pool,
5154 tc_replace_buffer_storage_func replace_buffer,
5155 const struct threaded_context_options *options,
5156 struct threaded_context **out)
5157 {
5158 struct threaded_context *tc;
5159
5160 if (!pipe)
5161 return NULL;
5162
5163 if (!debug_get_bool_option("GALLIUM_THREAD", true))
5164 return pipe;
5165
5166 tc = CALLOC_STRUCT(threaded_context);
5167 if (!tc) {
5168 pipe->destroy(pipe);
5169 return NULL;
5170 }
5171
5172 if (options) {
5173 /* this is unimplementable */
5174 assert(!(options->parse_renderpass_info && options->driver_calls_flush_notify));
5175 tc->options = *options;
5176 }
5177
5178 pipe = trace_context_create_threaded(pipe->screen, pipe, &replace_buffer, &tc->options);
5179
5180 /* The driver context isn't wrapped, so set its "priv" to NULL. */
5181 pipe->priv = NULL;
5182
5183 tc->pipe = pipe;
5184 tc->replace_buffer_storage = replace_buffer;
5185 tc->map_buffer_alignment =
5186 pipe->screen->get_param(pipe->screen, PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT);
5187 tc->ubo_alignment =
5188 MAX2(pipe->screen->get_param(pipe->screen, PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT), 64);
5189 tc->base.priv = pipe; /* priv points to the wrapped driver context */
5190 tc->base.screen = pipe->screen;
5191 tc->base.destroy = tc_destroy;
5192 tc->base.callback = tc_callback;
5193
5194 tc->base.stream_uploader = u_upload_clone(&tc->base, pipe->stream_uploader);
5195 if (pipe->stream_uploader == pipe->const_uploader)
5196 tc->base.const_uploader = tc->base.stream_uploader;
5197 else
5198 tc->base.const_uploader = u_upload_clone(&tc->base, pipe->const_uploader);
5199
5200 if (!tc->base.stream_uploader || !tc->base.const_uploader)
5201 goto fail;
5202
5203 tc->use_forced_staging_uploads = true;
5204
5205 /* The queue size is the number of batches "waiting". Batches are removed
5206 * from the queue before being executed, so keep one tc_batch slot for that
5207 * execution. Also, keep one unused slot for an unflushed batch.
5208 */
5209 if (!util_queue_init(&tc->queue, "gdrv", TC_MAX_BATCHES - 2, 1, 0, NULL))
5210 goto fail;
5211
5212 tc->last_completed = -1;
5213 for (unsigned i = 0; i < TC_MAX_BATCHES; i++) {
5214 #if !defined(NDEBUG) && TC_DEBUG >= 1
5215 tc->batch_slots[i].sentinel = TC_SENTINEL;
5216 #endif
5217 tc->batch_slots[i].tc = tc;
5218 tc->batch_slots[i].batch_idx = i;
5219 util_queue_fence_init(&tc->batch_slots[i].fence);
5220 tc->batch_slots[i].renderpass_info_idx = -1;
5221 if (tc->options.parse_renderpass_info) {
5222 util_dynarray_init(&tc->batch_slots[i].renderpass_infos, NULL);
5223 tc_batch_renderpass_infos_resize(tc, &tc->batch_slots[i]);
5224 }
5225 }
5226 for (unsigned i = 0; i < TC_MAX_BUFFER_LISTS; i++)
5227 util_queue_fence_init(&tc->buffer_lists[i].driver_flushed_fence);
5228
5229 list_inithead(&tc->unflushed_queries);
5230
5231 slab_create_child(&tc->pool_transfers, parent_transfer_pool);
5232
5233 /* If you have different limits in each shader stage, set the maximum. */
5234 struct pipe_screen *screen = pipe->screen;;
5235 tc->max_const_buffers =
5236 screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT,
5237 PIPE_SHADER_CAP_MAX_CONST_BUFFERS);
5238 tc->max_shader_buffers =
5239 screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT,
5240 PIPE_SHADER_CAP_MAX_SHADER_BUFFERS);
5241 tc->max_images =
5242 screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT,
5243 PIPE_SHADER_CAP_MAX_SHADER_IMAGES);
5244 tc->max_samplers =
5245 screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT,
5246 PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS);
5247
5248 tc->base.set_context_param = tc_set_context_param; /* always set this */
5249
5250 #define CTX_INIT(_member) \
5251 tc->base._member = tc->pipe->_member ? tc_##_member : NULL
5252
5253 CTX_INIT(flush);
5254 CTX_INIT(draw_vbo);
5255 CTX_INIT(draw_vertex_state);
5256 CTX_INIT(launch_grid);
5257 CTX_INIT(resource_copy_region);
5258 CTX_INIT(blit);
5259 CTX_INIT(clear);
5260 CTX_INIT(clear_render_target);
5261 CTX_INIT(clear_depth_stencil);
5262 CTX_INIT(clear_buffer);
5263 CTX_INIT(clear_texture);
5264 CTX_INIT(flush_resource);
5265 CTX_INIT(generate_mipmap);
5266 CTX_INIT(render_condition);
5267 CTX_INIT(create_query);
5268 CTX_INIT(create_batch_query);
5269 CTX_INIT(destroy_query);
5270 CTX_INIT(begin_query);
5271 CTX_INIT(end_query);
5272 CTX_INIT(get_query_result);
5273 CTX_INIT(get_query_result_resource);
5274 CTX_INIT(set_active_query_state);
5275 CTX_INIT(create_blend_state);
5276 CTX_INIT(bind_blend_state);
5277 CTX_INIT(delete_blend_state);
5278 CTX_INIT(create_sampler_state);
5279 CTX_INIT(bind_sampler_states);
5280 CTX_INIT(delete_sampler_state);
5281 CTX_INIT(create_rasterizer_state);
5282 CTX_INIT(bind_rasterizer_state);
5283 CTX_INIT(delete_rasterizer_state);
5284 CTX_INIT(create_depth_stencil_alpha_state);
5285 CTX_INIT(bind_depth_stencil_alpha_state);
5286 CTX_INIT(delete_depth_stencil_alpha_state);
5287 CTX_INIT(link_shader);
5288 CTX_INIT(create_fs_state);
5289 CTX_INIT(bind_fs_state);
5290 CTX_INIT(delete_fs_state);
5291 CTX_INIT(create_vs_state);
5292 CTX_INIT(bind_vs_state);
5293 CTX_INIT(delete_vs_state);
5294 CTX_INIT(create_gs_state);
5295 CTX_INIT(bind_gs_state);
5296 CTX_INIT(delete_gs_state);
5297 CTX_INIT(create_tcs_state);
5298 CTX_INIT(bind_tcs_state);
5299 CTX_INIT(delete_tcs_state);
5300 CTX_INIT(create_tes_state);
5301 CTX_INIT(bind_tes_state);
5302 CTX_INIT(delete_tes_state);
5303 CTX_INIT(create_compute_state);
5304 CTX_INIT(bind_compute_state);
5305 CTX_INIT(delete_compute_state);
5306 CTX_INIT(create_vertex_elements_state);
5307 CTX_INIT(bind_vertex_elements_state);
5308 CTX_INIT(delete_vertex_elements_state);
5309 CTX_INIT(set_blend_color);
5310 CTX_INIT(set_stencil_ref);
5311 CTX_INIT(set_sample_mask);
5312 CTX_INIT(set_min_samples);
5313 CTX_INIT(set_clip_state);
5314 CTX_INIT(set_constant_buffer);
5315 CTX_INIT(set_inlinable_constants);
5316 CTX_INIT(set_framebuffer_state);
5317 CTX_INIT(set_polygon_stipple);
5318 CTX_INIT(set_sample_locations);
5319 CTX_INIT(set_scissor_states);
5320 CTX_INIT(set_viewport_states);
5321 CTX_INIT(set_window_rectangles);
5322 CTX_INIT(set_sampler_views);
5323 CTX_INIT(set_tess_state);
5324 CTX_INIT(set_patch_vertices);
5325 CTX_INIT(set_shader_buffers);
5326 CTX_INIT(set_shader_images);
5327 CTX_INIT(set_vertex_buffers);
5328 CTX_INIT(create_stream_output_target);
5329 CTX_INIT(stream_output_target_destroy);
5330 CTX_INIT(set_stream_output_targets);
5331 CTX_INIT(create_sampler_view);
5332 CTX_INIT(sampler_view_destroy);
5333 CTX_INIT(create_surface);
5334 CTX_INIT(surface_destroy);
5335 CTX_INIT(buffer_map);
5336 CTX_INIT(texture_map);
5337 CTX_INIT(transfer_flush_region);
5338 CTX_INIT(buffer_unmap);
5339 CTX_INIT(texture_unmap);
5340 CTX_INIT(buffer_subdata);
5341 CTX_INIT(texture_subdata);
5342 CTX_INIT(texture_barrier);
5343 CTX_INIT(memory_barrier);
5344 CTX_INIT(resource_commit);
5345 CTX_INIT(create_video_codec);
5346 CTX_INIT(create_video_buffer);
5347 CTX_INIT(set_compute_resources);
5348 CTX_INIT(set_global_binding);
5349 CTX_INIT(get_sample_position);
5350 CTX_INIT(invalidate_resource);
5351 CTX_INIT(get_device_reset_status);
5352 CTX_INIT(set_device_reset_callback);
5353 CTX_INIT(dump_debug_state);
5354 CTX_INIT(set_log_context);
5355 CTX_INIT(emit_string_marker);
5356 CTX_INIT(set_debug_callback);
5357 CTX_INIT(create_fence_fd);
5358 CTX_INIT(fence_server_sync);
5359 CTX_INIT(fence_server_signal);
5360 CTX_INIT(get_timestamp);
5361 CTX_INIT(create_texture_handle);
5362 CTX_INIT(delete_texture_handle);
5363 CTX_INIT(make_texture_handle_resident);
5364 CTX_INIT(create_image_handle);
5365 CTX_INIT(delete_image_handle);
5366 CTX_INIT(make_image_handle_resident);
5367 CTX_INIT(set_frontend_noop);
5368 CTX_INIT(init_intel_perf_query_info);
5369 CTX_INIT(get_intel_perf_query_info);
5370 CTX_INIT(get_intel_perf_query_counter_info);
5371 CTX_INIT(new_intel_perf_query_obj);
5372 CTX_INIT(begin_intel_perf_query);
5373 CTX_INIT(end_intel_perf_query);
5374 CTX_INIT(delete_intel_perf_query);
5375 CTX_INIT(wait_intel_perf_query);
5376 CTX_INIT(is_intel_perf_query_ready);
5377 CTX_INIT(get_intel_perf_query_data);
5378 #undef CTX_INIT
5379
5380 #define CALL(name) tc->execute_func[TC_CALL_##name] = tc_call_##name;
5381 #include "u_threaded_context_calls.h"
5382 #undef CALL
5383
5384 if (out)
5385 *out = tc;
5386
5387 tc_begin_next_buffer_list(tc);
5388 if (tc->options.parse_renderpass_info)
5389 tc_batch_increment_renderpass_info(tc, tc->next, false);
5390 return &tc->base;
5391
5392 fail:
5393 tc_destroy(&tc->base);
5394 return NULL;
5395 }
5396
5397 void
threaded_context_init_bytes_mapped_limit(struct threaded_context * tc,unsigned divisor)5398 threaded_context_init_bytes_mapped_limit(struct threaded_context *tc, unsigned divisor)
5399 {
5400 uint64_t total_ram;
5401 if (os_get_total_physical_memory(&total_ram)) {
5402 tc->bytes_mapped_limit = total_ram / divisor;
5403 if (sizeof(void*) == 4)
5404 tc->bytes_mapped_limit = MIN2(tc->bytes_mapped_limit, 512*1024*1024UL);
5405 }
5406 }
5407
5408 const struct tc_renderpass_info *
threaded_context_get_renderpass_info(struct threaded_context * tc)5409 threaded_context_get_renderpass_info(struct threaded_context *tc)
5410 {
5411 assert(tc->renderpass_info && tc->options.parse_renderpass_info);
5412 struct tc_batch_rp_info *info = tc_batch_rp_info(tc->renderpass_info);
5413 while (1) {
5414 util_queue_fence_wait(&info->ready);
5415 if (!info->next)
5416 return &info->info;
5417 info = info->next;
5418 }
5419 }
5420