• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**************************************************************************
2  *
3  * Copyright 2012 Marek Olšák <maraeo@gmail.com>
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the
15  * next paragraph) shall be included in all copies or substantial portions
16  * of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21  * IN NO EVENT SHALL THE AUTHORS AND/OR THEIR SUPPLIERS BE LIABLE FOR
22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  *
26  **************************************************************************/
27 
28 #include "util/format/format_utils.h"
29 #include "util/u_cpu_detect.h"
30 #include "util/u_helpers.h"
31 #include "util/u_inlines.h"
32 #include "util/u_upload_mgr.h"
33 #include "util/u_thread.h"
34 #include "util/os_time.h"
35 #include <inttypes.h>
36 
37 /**
38  * This function is used to copy an array of pipe_vertex_buffer structures,
39  * while properly referencing the pipe_vertex_buffer::buffer member.
40  *
41  * enabled_buffers is updated such that the bits corresponding to the indices
42  * of disabled buffers are set to 0 and the enabled ones are set to 1.
43  *
44  * \sa util_copy_framebuffer_state
45  */
util_set_vertex_buffers_mask(struct pipe_vertex_buffer * dst,uint32_t * enabled_buffers,const struct pipe_vertex_buffer * src,unsigned start_slot,unsigned count,unsigned unbind_num_trailing_slots,bool take_ownership)46 void util_set_vertex_buffers_mask(struct pipe_vertex_buffer *dst,
47                                   uint32_t *enabled_buffers,
48                                   const struct pipe_vertex_buffer *src,
49                                   unsigned start_slot, unsigned count,
50                                   unsigned unbind_num_trailing_slots,
51                                   bool take_ownership)
52 {
53    unsigned i;
54    uint32_t bitmask = 0;
55 
56    dst += start_slot;
57 
58    *enabled_buffers &= ~u_bit_consecutive(start_slot, count);
59 
60    if (src) {
61       for (i = 0; i < count; i++) {
62          if (src[i].buffer.resource)
63             bitmask |= 1 << i;
64 
65          pipe_vertex_buffer_unreference(&dst[i]);
66 
67          if (!take_ownership && !src[i].is_user_buffer)
68             pipe_resource_reference(&dst[i].buffer.resource, src[i].buffer.resource);
69       }
70 
71       /* Copy over the other members of pipe_vertex_buffer. */
72       memcpy(dst, src, count * sizeof(struct pipe_vertex_buffer));
73 
74       *enabled_buffers |= bitmask << start_slot;
75    }
76    else {
77       /* Unreference the buffers. */
78       for (i = 0; i < count; i++)
79          pipe_vertex_buffer_unreference(&dst[i]);
80    }
81 
82    for (i = 0; i < unbind_num_trailing_slots; i++)
83       pipe_vertex_buffer_unreference(&dst[count + i]);
84 }
85 
86 /**
87  * Same as util_set_vertex_buffers_mask, but it only returns the number
88  * of bound buffers.
89  */
util_set_vertex_buffers_count(struct pipe_vertex_buffer * dst,unsigned * dst_count,const struct pipe_vertex_buffer * src,unsigned start_slot,unsigned count,unsigned unbind_num_trailing_slots,bool take_ownership)90 void util_set_vertex_buffers_count(struct pipe_vertex_buffer *dst,
91                                    unsigned *dst_count,
92                                    const struct pipe_vertex_buffer *src,
93                                    unsigned start_slot, unsigned count,
94                                    unsigned unbind_num_trailing_slots,
95                                    bool take_ownership)
96 {
97    unsigned i;
98    uint32_t enabled_buffers = 0;
99 
100    for (i = 0; i < *dst_count; i++) {
101       if (dst[i].buffer.resource)
102          enabled_buffers |= (1ull << i);
103    }
104 
105    util_set_vertex_buffers_mask(dst, &enabled_buffers, src, start_slot,
106                                 count, unbind_num_trailing_slots,
107                                 take_ownership);
108 
109    *dst_count = util_last_bit(enabled_buffers);
110 }
111 
112 /**
113  * This function is used to copy an array of pipe_shader_buffer structures,
114  * while properly referencing the pipe_shader_buffer::buffer member.
115  *
116  * \sa util_set_vertex_buffer_mask
117  */
util_set_shader_buffers_mask(struct pipe_shader_buffer * dst,uint32_t * enabled_buffers,const struct pipe_shader_buffer * src,unsigned start_slot,unsigned count)118 void util_set_shader_buffers_mask(struct pipe_shader_buffer *dst,
119                                   uint32_t *enabled_buffers,
120                                   const struct pipe_shader_buffer *src,
121                                   unsigned start_slot, unsigned count)
122 {
123    unsigned i;
124 
125    dst += start_slot;
126 
127    if (src) {
128       for (i = 0; i < count; i++) {
129          pipe_resource_reference(&dst[i].buffer, src[i].buffer);
130 
131          if (src[i].buffer)
132             *enabled_buffers |= (1ull << (start_slot + i));
133          else
134             *enabled_buffers &= ~(1ull << (start_slot + i));
135       }
136 
137       /* Copy over the other members of pipe_shader_buffer. */
138       memcpy(dst, src, count * sizeof(struct pipe_shader_buffer));
139    }
140    else {
141       /* Unreference the buffers. */
142       for (i = 0; i < count; i++)
143          pipe_resource_reference(&dst[i].buffer, NULL);
144 
145       *enabled_buffers &= ~(((1ull << count) - 1) << start_slot);
146    }
147 }
148 
149 /**
150  * Given a user index buffer, save the structure to "saved", and upload it.
151  */
152 bool
util_upload_index_buffer(struct pipe_context * pipe,const struct pipe_draw_info * info,const struct pipe_draw_start_count_bias * draw,struct pipe_resource ** out_buffer,unsigned * out_offset,unsigned alignment)153 util_upload_index_buffer(struct pipe_context *pipe,
154                          const struct pipe_draw_info *info,
155                          const struct pipe_draw_start_count_bias *draw,
156                          struct pipe_resource **out_buffer,
157                          unsigned *out_offset, unsigned alignment)
158 {
159    unsigned start_offset = draw->start * info->index_size;
160 
161    u_upload_data(pipe->stream_uploader, start_offset,
162                  draw->count * info->index_size, alignment,
163                  (char*)info->index.user + start_offset,
164                  out_offset, out_buffer);
165    u_upload_unmap(pipe->stream_uploader);
166    *out_offset -= start_offset;
167    return *out_buffer != NULL;
168 }
169 
170 /**
171  * Lower each UINT64 vertex element to 1 or 2 UINT32 vertex elements.
172  * 3 and 4 component formats are expanded into 2 slots.
173  *
174  * @param velems        Original vertex elements, will be updated to contain
175  *                      the lowered vertex elements.
176  * @param velem_count   Original count, will be updated to contain the count
177  *                      after lowering.
178  * @param tmp           Temporary array of PIPE_MAX_ATTRIBS vertex elements.
179  */
180 void
util_lower_uint64_vertex_elements(const struct pipe_vertex_element ** velems,unsigned * velem_count,struct pipe_vertex_element tmp[PIPE_MAX_ATTRIBS])181 util_lower_uint64_vertex_elements(const struct pipe_vertex_element **velems,
182                                   unsigned *velem_count,
183                                   struct pipe_vertex_element tmp[PIPE_MAX_ATTRIBS])
184 {
185    const struct pipe_vertex_element *input = *velems;
186    unsigned count = *velem_count;
187    bool has_64bit = false;
188 
189    for (unsigned i = 0; i < count; i++) {
190       has_64bit |= input[i].src_format >= PIPE_FORMAT_R64_UINT &&
191                    input[i].src_format <= PIPE_FORMAT_R64G64B64A64_UINT;
192    }
193 
194    /* Return the original vertex elements if there is nothing to do. */
195    if (!has_64bit)
196       return;
197 
198    /* Lower 64_UINT to 32_UINT. */
199    unsigned new_count = 0;
200 
201    for (unsigned i = 0; i < count; i++) {
202       enum pipe_format format = input[i].src_format;
203 
204       /* If the shader input is dvec2 or smaller, reduce the number of
205        * components to 2 at most. If the shader input is dvec3 or larger,
206        * expand the number of components to 3 at least. If the 3rd component
207        * is out of bounds, the hardware shouldn't skip loading the first
208        * 2 components.
209        */
210       if (format >= PIPE_FORMAT_R64_UINT &&
211           format <= PIPE_FORMAT_R64G64B64A64_UINT) {
212          if (input[i].dual_slot)
213             format = MAX2(format, PIPE_FORMAT_R64G64B64_UINT);
214          else
215             format = MIN2(format, PIPE_FORMAT_R64G64_UINT);
216       }
217 
218       switch (format) {
219       case PIPE_FORMAT_R64_UINT:
220          tmp[new_count] = input[i];
221          tmp[new_count].src_format = PIPE_FORMAT_R32G32_UINT;
222          new_count++;
223          break;
224 
225       case PIPE_FORMAT_R64G64_UINT:
226          tmp[new_count] = input[i];
227          tmp[new_count].src_format = PIPE_FORMAT_R32G32B32A32_UINT;
228          new_count++;
229          break;
230 
231       case PIPE_FORMAT_R64G64B64_UINT:
232       case PIPE_FORMAT_R64G64B64A64_UINT:
233          assert(new_count + 2 <= PIPE_MAX_ATTRIBS);
234          tmp[new_count] = tmp[new_count + 1] = input[i];
235          tmp[new_count].src_format = PIPE_FORMAT_R32G32B32A32_UINT;
236          tmp[new_count + 1].src_format =
237             format == PIPE_FORMAT_R64G64B64_UINT ?
238                   PIPE_FORMAT_R32G32_UINT :
239                   PIPE_FORMAT_R32G32B32A32_UINT;
240          tmp[new_count + 1].src_offset += 16;
241          new_count += 2;
242          break;
243 
244       default:
245          tmp[new_count++] = input[i];
246          break;
247       }
248    }
249 
250    *velem_count = new_count;
251    *velems = tmp;
252 }
253 
254 /* This is a helper for hardware bring-up. Don't remove. */
255 struct pipe_query *
util_begin_pipestat_query(struct pipe_context * ctx)256 util_begin_pipestat_query(struct pipe_context *ctx)
257 {
258    struct pipe_query *q =
259       ctx->create_query(ctx, PIPE_QUERY_PIPELINE_STATISTICS, 0);
260    if (!q)
261       return NULL;
262 
263    ctx->begin_query(ctx, q);
264    return q;
265 }
266 
267 /* This is a helper for hardware bring-up. Don't remove. */
268 void
util_end_pipestat_query(struct pipe_context * ctx,struct pipe_query * q,FILE * f)269 util_end_pipestat_query(struct pipe_context *ctx, struct pipe_query *q,
270                         FILE *f)
271 {
272    static unsigned counter;
273    struct pipe_query_data_pipeline_statistics stats;
274 
275    ctx->end_query(ctx, q);
276    ctx->get_query_result(ctx, q, true, (void*)&stats);
277    ctx->destroy_query(ctx, q);
278 
279    fprintf(f,
280            "Draw call %u:\n"
281            "    ia_vertices    = %"PRIu64"\n"
282            "    ia_primitives  = %"PRIu64"\n"
283            "    vs_invocations = %"PRIu64"\n"
284            "    gs_invocations = %"PRIu64"\n"
285            "    gs_primitives  = %"PRIu64"\n"
286            "    c_invocations  = %"PRIu64"\n"
287            "    c_primitives   = %"PRIu64"\n"
288            "    ps_invocations = %"PRIu64"\n"
289            "    hs_invocations = %"PRIu64"\n"
290            "    ds_invocations = %"PRIu64"\n"
291            "    cs_invocations = %"PRIu64"\n",
292            (unsigned)p_atomic_inc_return(&counter),
293            stats.ia_vertices,
294            stats.ia_primitives,
295            stats.vs_invocations,
296            stats.gs_invocations,
297            stats.gs_primitives,
298            stats.c_invocations,
299            stats.c_primitives,
300            stats.ps_invocations,
301            stats.hs_invocations,
302            stats.ds_invocations,
303            stats.cs_invocations);
304 }
305 
306 /* This is a helper for profiling. Don't remove. */
307 struct pipe_query *
util_begin_time_query(struct pipe_context * ctx)308 util_begin_time_query(struct pipe_context *ctx)
309 {
310    struct pipe_query *q =
311       ctx->create_query(ctx, PIPE_QUERY_TIME_ELAPSED, 0);
312    if (!q)
313       return NULL;
314 
315    ctx->begin_query(ctx, q);
316    return q;
317 }
318 
319 /* This is a helper for profiling. Don't remove. */
320 void
util_end_time_query(struct pipe_context * ctx,struct pipe_query * q,FILE * f,const char * name)321 util_end_time_query(struct pipe_context *ctx, struct pipe_query *q, FILE *f,
322                     const char *name)
323 {
324    union pipe_query_result result;
325 
326    ctx->end_query(ctx, q);
327    ctx->get_query_result(ctx, q, true, &result);
328    ctx->destroy_query(ctx, q);
329 
330    fprintf(f, "Time elapsed: %s - %"PRIu64".%u us\n", name, result.u64 / 1000, (unsigned)(result.u64 % 1000) / 100);
331 }
332 
333 /* This is a helper for hardware bring-up. Don't remove. */
334 void
util_wait_for_idle(struct pipe_context * ctx)335 util_wait_for_idle(struct pipe_context *ctx)
336 {
337    struct pipe_fence_handle *fence = NULL;
338 
339    ctx->flush(ctx, &fence, 0);
340    ctx->screen->fence_finish(ctx->screen, NULL, fence, PIPE_TIMEOUT_INFINITE);
341 }
342 
343 void
util_throttle_init(struct util_throttle * t,uint64_t max_mem_usage)344 util_throttle_init(struct util_throttle *t, uint64_t max_mem_usage)
345 {
346    t->max_mem_usage = max_mem_usage;
347 }
348 
349 void
util_throttle_deinit(struct pipe_screen * screen,struct util_throttle * t)350 util_throttle_deinit(struct pipe_screen *screen, struct util_throttle *t)
351 {
352    for (unsigned i = 0; i < ARRAY_SIZE(t->ring); i++)
353       screen->fence_reference(screen, &t->ring[i].fence, NULL);
354 }
355 
356 static uint64_t
util_get_throttle_total_memory_usage(struct util_throttle * t)357 util_get_throttle_total_memory_usage(struct util_throttle *t)
358 {
359    uint64_t total_usage = 0;
360 
361    for (unsigned i = 0; i < ARRAY_SIZE(t->ring); i++)
362       total_usage += t->ring[i].mem_usage;
363    return total_usage;
364 }
365 
util_dump_throttle_ring(struct util_throttle * t)366 static void util_dump_throttle_ring(struct util_throttle *t)
367 {
368    printf("Throttle:\n");
369    for (unsigned i = 0; i < ARRAY_SIZE(t->ring); i++) {
370       printf("  ring[%u]: fence = %s, mem_usage = %"PRIu64"%s%s\n",
371              i, t->ring[i].fence ? "yes" : " no",
372              t->ring[i].mem_usage,
373              t->flush_index == i ? " [flush]" : "",
374              t->wait_index == i ? " [wait]" : "");
375    }
376 }
377 
378 /**
379  * Notify util_throttle that the next operation allocates memory.
380  * util_throttle tracks memory usage and waits for fences until its tracked
381  * memory usage decreases.
382  *
383  * Example:
384  *   util_throttle_memory_usage(..., w*h*d*Bpp);
385  *   TexSubImage(..., w, h, d, ...);
386  *
387  * This means that TexSubImage can't allocate more memory its maximum limit
388  * set during initialization.
389  */
390 void
util_throttle_memory_usage(struct pipe_context * pipe,struct util_throttle * t,uint64_t memory_size)391 util_throttle_memory_usage(struct pipe_context *pipe,
392                            struct util_throttle *t, uint64_t memory_size)
393 {
394    (void)util_dump_throttle_ring; /* silence warning */
395 
396    if (!t->max_mem_usage)
397       return;
398 
399    struct pipe_screen *screen = pipe->screen;
400    struct pipe_fence_handle **fence = NULL;
401    unsigned ring_size = ARRAY_SIZE(t->ring);
402    uint64_t total = util_get_throttle_total_memory_usage(t);
403 
404    /* If there is not enough memory, walk the list of fences and find
405     * the latest one that we need to wait for.
406     */
407    while (t->wait_index != t->flush_index &&
408           total && total + memory_size > t->max_mem_usage) {
409       assert(t->ring[t->wait_index].fence);
410 
411       /* Release an older fence if we need to wait for a newer one. */
412       if (fence)
413          screen->fence_reference(screen, fence, NULL);
414 
415       fence = &t->ring[t->wait_index].fence;
416       t->ring[t->wait_index].mem_usage = 0;
417       t->wait_index = (t->wait_index + 1) % ring_size;
418 
419       total = util_get_throttle_total_memory_usage(t);
420    }
421 
422    /* Wait for the fence to decrease memory usage. */
423    if (fence) {
424       screen->fence_finish(screen, pipe, *fence, PIPE_TIMEOUT_INFINITE);
425       screen->fence_reference(screen, fence, NULL);
426    }
427 
428    /* Flush and get a fence if we've exhausted memory usage for the current
429     * slot.
430     */
431    if (t->ring[t->flush_index].mem_usage &&
432        t->ring[t->flush_index].mem_usage + memory_size >
433        t->max_mem_usage / (ring_size / 2)) {
434       struct pipe_fence_handle **fence =
435          &t->ring[t->flush_index].fence;
436 
437       /* Expect that the current flush slot doesn't have a fence yet. */
438       assert(!*fence);
439 
440       pipe->flush(pipe, fence, PIPE_FLUSH_ASYNC);
441       t->flush_index = (t->flush_index + 1) % ring_size;
442 
443       /* Vacate the next slot if it's occupied. This should be rare. */
444       if (t->flush_index == t->wait_index) {
445          struct pipe_fence_handle **fence =
446             &t->ring[t->wait_index].fence;
447 
448          t->ring[t->wait_index].mem_usage = 0;
449          t->wait_index = (t->wait_index + 1) % ring_size;
450 
451          assert(*fence);
452          screen->fence_finish(screen, pipe, *fence, PIPE_TIMEOUT_INFINITE);
453          screen->fence_reference(screen, fence, NULL);
454       }
455 
456       assert(!t->ring[t->flush_index].mem_usage);
457       assert(!t->ring[t->flush_index].fence);
458    }
459 
460    t->ring[t->flush_index].mem_usage += memory_size;
461 }
462 
463 bool
util_lower_clearsize_to_dword(const void * clearValue,int * clearValueSize,uint32_t * clamped)464 util_lower_clearsize_to_dword(const void *clearValue, int *clearValueSize, uint32_t *clamped)
465 {
466    /* Reduce a large clear value size if possible. */
467    if (*clearValueSize > 4) {
468       bool clear_dword_duplicated = true;
469       const uint32_t *clear_value = clearValue;
470 
471       /* See if we can lower large fills to dword fills. */
472       for (unsigned i = 1; i < *clearValueSize / 4; i++) {
473          if (clear_value[0] != clear_value[i]) {
474             clear_dword_duplicated = false;
475             break;
476          }
477       }
478       if (clear_dword_duplicated) {
479          *clamped = *clear_value;
480          *clearValueSize = 4;
481       }
482       return clear_dword_duplicated;
483    }
484 
485    /* Expand a small clear value size. */
486    if (*clearValueSize <= 2) {
487       if (*clearValueSize == 1) {
488          *clamped = *(uint8_t *)clearValue;
489          *clamped |=
490             (*clamped << 8) | (*clamped << 16) | (*clamped << 24);
491       } else {
492          *clamped = *(uint16_t *)clearValue;
493          *clamped |= *clamped << 16;
494       }
495       *clearValueSize = 4;
496       return true;
497    }
498    return false;
499 }
500 
501 void
util_init_pipe_vertex_state(struct pipe_screen * screen,struct pipe_vertex_buffer * buffer,const struct pipe_vertex_element * elements,unsigned num_elements,struct pipe_resource * indexbuf,uint32_t full_velem_mask,struct pipe_vertex_state * state)502 util_init_pipe_vertex_state(struct pipe_screen *screen,
503                             struct pipe_vertex_buffer *buffer,
504                             const struct pipe_vertex_element *elements,
505                             unsigned num_elements,
506                             struct pipe_resource *indexbuf,
507                             uint32_t full_velem_mask,
508                             struct pipe_vertex_state *state)
509 {
510    assert(num_elements == util_bitcount(full_velem_mask));
511 
512    pipe_reference_init(&state->reference, 1);
513    state->screen = screen;
514 
515    pipe_vertex_buffer_reference(&state->input.vbuffer, buffer);
516    pipe_resource_reference(&state->input.indexbuf, indexbuf);
517    state->input.num_elements = num_elements;
518    for (unsigned i = 0; i < num_elements; i++)
519       state->input.elements[i] = elements[i];
520    state->input.full_velem_mask = full_velem_mask;
521 }
522 
523 /**
524  * Clamp color value to format range.
525  */
526 union pipe_color_union
util_clamp_color(enum pipe_format format,const union pipe_color_union * color)527 util_clamp_color(enum pipe_format format,
528                  const union pipe_color_union *color)
529 {
530    union pipe_color_union clamp_color = *color;
531    int i;
532 
533    for (i = 0; i < util_format_get_nr_components(format); i++) {
534       uint8_t bits = util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, i);
535 
536       if (util_format_is_unorm(format))
537          clamp_color.ui[i] = _mesa_unorm_to_unorm(clamp_color.ui[i], bits, bits);
538       else if (util_format_is_snorm(format))
539          clamp_color.i[i] = _mesa_snorm_to_snorm(clamp_color.i[i], bits, bits);
540       else if (util_format_is_pure_uint(format))
541          clamp_color.ui[i] = _mesa_unsigned_to_unsigned(clamp_color.ui[i], bits);
542       else if (util_format_is_pure_sint(format))
543          clamp_color.i[i] = _mesa_signed_to_signed(clamp_color.i[i], bits);
544    }
545 
546    return clamp_color;
547 }
548