• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**************************************************************************
2  *
3  * Copyright 2011 Marek Olšák <maraeo@gmail.com>
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the
15  * next paragraph) shall be included in all copies or substantial portions
16  * of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21  * IN NO EVENT SHALL AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  *
26  **************************************************************************/
27 
28 /**
29  * This module uploads user buffers and translates the vertex buffers which
30  * contain incompatible vertices (i.e. not supported by the driver/hardware)
31  * into compatible ones, based on the Gallium CAPs.
32  *
33  * It does not upload index buffers.
34  *
35  * The module heavily uses bitmasks to represent per-buffer and
36  * per-vertex-element flags to avoid looping over the list of buffers just
37  * to see if there's a non-zero stride, or user buffer, or unsupported format,
38  * etc.
39  *
40  * There are 3 categories of vertex elements, which are processed separately:
41  * - per-vertex attribs (stride != 0, instance_divisor == 0)
42  * - instanced attribs (stride != 0, instance_divisor > 0)
43  * - constant attribs (stride == 0)
44  *
45  * All needed uploads and translations are performed every draw command, but
46  * only the subset of vertices needed for that draw command is uploaded or
47  * translated. (the module never translates whole buffers)
48  *
49  *
50  * The module consists of two main parts:
51  *
52  *
53  * 1) Translate (u_vbuf_translate_begin/end)
54  *
55  * This is pretty much a vertex fetch fallback. It translates vertices from
56  * one vertex buffer to another in an unused vertex buffer slot. It does
57  * whatever is needed to make the vertices readable by the hardware (changes
58  * vertex formats and aligns offsets and strides). The translate module is
59  * used here.
60  *
61  * Each of the 3 categories is translated to a separate buffer.
62  * Only the [min_index, max_index] range is translated. For instanced attribs,
63  * the range is [start_instance, start_instance+instance_count]. For constant
64  * attribs, the range is [0, 1].
65  *
66  *
67  * 2) User buffer uploading (u_vbuf_upload_buffers)
68  *
69  * Only the [min_index, max_index] range is uploaded (just like Translate)
70  * with a single memcpy.
71  *
72  * This method works best for non-indexed draw operations or indexed draw
73  * operations where the [min_index, max_index] range is not being way bigger
74  * than the vertex count.
75  *
76  * If the range is too big (e.g. one triangle with indices {0, 1, 10000}),
77  * the per-vertex attribs are uploaded via the translate module, all packed
78  * into one vertex buffer, and the indexed draw call is turned into
79  * a non-indexed one in the process. This adds additional complexity
80  * to the translate part, but it prevents bad apps from bringing your frame
81  * rate down.
82  *
83  *
84  * If there is nothing to do, it forwards every command to the driver.
85  * The module also has its own CSO cache of vertex element states.
86  */
87 
88 #include "util/u_vbuf.h"
89 
90 #include "util/u_dump.h"
91 #include "util/u_format.h"
92 #include "util/u_inlines.h"
93 #include "util/u_memory.h"
94 #include "util/u_upload_mgr.h"
95 #include "translate/translate.h"
96 #include "translate/translate_cache.h"
97 #include "cso_cache/cso_cache.h"
98 #include "cso_cache/cso_hash.h"
99 
100 struct u_vbuf_elements {
101    unsigned count;
102    struct pipe_vertex_element ve[PIPE_MAX_ATTRIBS];
103 
104    unsigned src_format_size[PIPE_MAX_ATTRIBS];
105 
106    /* If (velem[i].src_format != native_format[i]), the vertex buffer
107     * referenced by the vertex element cannot be used for rendering and
108     * its vertex data must be translated to native_format[i]. */
109    enum pipe_format native_format[PIPE_MAX_ATTRIBS];
110    unsigned native_format_size[PIPE_MAX_ATTRIBS];
111 
112    /* Which buffers are used by the vertex element state. */
113    uint32_t used_vb_mask;
114    /* This might mean two things:
115     * - src_format != native_format, as discussed above.
116     * - src_offset % 4 != 0 (if the caps don't allow such an offset). */
117    uint32_t incompatible_elem_mask; /* each bit describes a corresp. attrib  */
118    /* Which buffer has at least one vertex element referencing it
119     * incompatible. */
120    uint32_t incompatible_vb_mask_any;
121    /* Which buffer has all vertex elements referencing it incompatible. */
122    uint32_t incompatible_vb_mask_all;
123    /* Which buffer has at least one vertex element referencing it
124     * compatible. */
125    uint32_t compatible_vb_mask_any;
126    /* Which buffer has all vertex elements referencing it compatible. */
127    uint32_t compatible_vb_mask_all;
128 
129    /* Which buffer has at least one vertex element referencing it
130     * non-instanced. */
131    uint32_t noninstance_vb_mask_any;
132 
133    void *driver_cso;
134 };
135 
136 enum {
137    VB_VERTEX = 0,
138    VB_INSTANCE = 1,
139    VB_CONST = 2,
140    VB_NUM = 3
141 };
142 
143 struct u_vbuf {
144    struct u_vbuf_caps caps;
145 
146    struct pipe_context *pipe;
147    struct translate_cache *translate_cache;
148    struct cso_cache *cso_cache;
149    struct u_upload_mgr *uploader;
150 
151    /* This is what was set in set_vertex_buffers.
152     * May contain user buffers. */
153    struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
154    uint32_t enabled_vb_mask;
155 
156    /* Saved vertex buffer. */
157    unsigned aux_vertex_buffer_slot;
158    struct pipe_vertex_buffer aux_vertex_buffer_saved;
159 
160    /* Vertex buffers for the driver.
161     * There are usually no user buffers. */
162    struct pipe_vertex_buffer real_vertex_buffer[PIPE_MAX_ATTRIBS];
163    uint32_t dirty_real_vb_mask; /* which buffers are dirty since the last
164                                    call of set_vertex_buffers */
165 
166    /* The index buffer. */
167    struct pipe_index_buffer index_buffer;
168 
169    /* Vertex elements. */
170    struct u_vbuf_elements *ve, *ve_saved;
171 
172    /* Vertex elements used for the translate fallback. */
173    struct pipe_vertex_element fallback_velems[PIPE_MAX_ATTRIBS];
174    /* If non-NULL, this is a vertex element state used for the translate
175     * fallback and therefore used for rendering too. */
176    boolean using_translate;
177    /* The vertex buffer slot index where translated vertices have been
178     * stored in. */
179    unsigned fallback_vbs[VB_NUM];
180 
181    /* Which buffer is a user buffer. */
182    uint32_t user_vb_mask; /* each bit describes a corresp. buffer */
183    /* Which buffer is incompatible (unaligned). */
184    uint32_t incompatible_vb_mask; /* each bit describes a corresp. buffer */
185    /* Which buffer has a non-zero stride. */
186    uint32_t nonzero_stride_vb_mask; /* each bit describes a corresp. buffer */
187 };
188 
189 static void *
190 u_vbuf_create_vertex_elements(struct u_vbuf *mgr, unsigned count,
191                               const struct pipe_vertex_element *attribs);
192 static void u_vbuf_delete_vertex_elements(struct u_vbuf *mgr, void *cso);
193 
194 static const struct {
195    enum pipe_format from, to;
196 } vbuf_format_fallbacks[] = {
197    { PIPE_FORMAT_R32_FIXED,            PIPE_FORMAT_R32_FLOAT },
198    { PIPE_FORMAT_R32G32_FIXED,         PIPE_FORMAT_R32G32_FLOAT },
199    { PIPE_FORMAT_R32G32B32_FIXED,      PIPE_FORMAT_R32G32B32_FLOAT },
200    { PIPE_FORMAT_R32G32B32A32_FIXED,   PIPE_FORMAT_R32G32B32A32_FLOAT },
201    { PIPE_FORMAT_R16_FLOAT,            PIPE_FORMAT_R32_FLOAT },
202    { PIPE_FORMAT_R16G16_FLOAT,         PIPE_FORMAT_R32G32_FLOAT },
203    { PIPE_FORMAT_R16G16B16_FLOAT,      PIPE_FORMAT_R32G32B32_FLOAT },
204    { PIPE_FORMAT_R16G16B16A16_FLOAT,   PIPE_FORMAT_R32G32B32A32_FLOAT },
205    { PIPE_FORMAT_R64_FLOAT,            PIPE_FORMAT_R32_FLOAT },
206    { PIPE_FORMAT_R64G64_FLOAT,         PIPE_FORMAT_R32G32_FLOAT },
207    { PIPE_FORMAT_R64G64B64_FLOAT,      PIPE_FORMAT_R32G32B32_FLOAT },
208    { PIPE_FORMAT_R64G64B64A64_FLOAT,   PIPE_FORMAT_R32G32B32A32_FLOAT },
209    { PIPE_FORMAT_R32_UNORM,            PIPE_FORMAT_R32_FLOAT },
210    { PIPE_FORMAT_R32G32_UNORM,         PIPE_FORMAT_R32G32_FLOAT },
211    { PIPE_FORMAT_R32G32B32_UNORM,      PIPE_FORMAT_R32G32B32_FLOAT },
212    { PIPE_FORMAT_R32G32B32A32_UNORM,   PIPE_FORMAT_R32G32B32A32_FLOAT },
213    { PIPE_FORMAT_R32_SNORM,            PIPE_FORMAT_R32_FLOAT },
214    { PIPE_FORMAT_R32G32_SNORM,         PIPE_FORMAT_R32G32_FLOAT },
215    { PIPE_FORMAT_R32G32B32_SNORM,      PIPE_FORMAT_R32G32B32_FLOAT },
216    { PIPE_FORMAT_R32G32B32A32_SNORM,   PIPE_FORMAT_R32G32B32A32_FLOAT },
217    { PIPE_FORMAT_R32_USCALED,          PIPE_FORMAT_R32_FLOAT },
218    { PIPE_FORMAT_R32G32_USCALED,       PIPE_FORMAT_R32G32_FLOAT },
219    { PIPE_FORMAT_R32G32B32_USCALED,    PIPE_FORMAT_R32G32B32_FLOAT },
220    { PIPE_FORMAT_R32G32B32A32_USCALED, PIPE_FORMAT_R32G32B32A32_FLOAT },
221    { PIPE_FORMAT_R32_SSCALED,          PIPE_FORMAT_R32_FLOAT },
222    { PIPE_FORMAT_R32G32_SSCALED,       PIPE_FORMAT_R32G32_FLOAT },
223    { PIPE_FORMAT_R32G32B32_SSCALED,    PIPE_FORMAT_R32G32B32_FLOAT },
224    { PIPE_FORMAT_R32G32B32A32_SSCALED, PIPE_FORMAT_R32G32B32A32_FLOAT },
225    { PIPE_FORMAT_R16_UNORM,            PIPE_FORMAT_R32_FLOAT },
226    { PIPE_FORMAT_R16G16_UNORM,         PIPE_FORMAT_R32G32_FLOAT },
227    { PIPE_FORMAT_R16G16B16_UNORM,      PIPE_FORMAT_R32G32B32_FLOAT },
228    { PIPE_FORMAT_R16G16B16A16_UNORM,   PIPE_FORMAT_R32G32B32A32_FLOAT },
229    { PIPE_FORMAT_R16_SNORM,            PIPE_FORMAT_R32_FLOAT },
230    { PIPE_FORMAT_R16G16_SNORM,         PIPE_FORMAT_R32G32_FLOAT },
231    { PIPE_FORMAT_R16G16B16_SNORM,      PIPE_FORMAT_R32G32B32_FLOAT },
232    { PIPE_FORMAT_R16G16B16A16_SNORM,   PIPE_FORMAT_R32G32B32A32_FLOAT },
233    { PIPE_FORMAT_R16_USCALED,          PIPE_FORMAT_R32_FLOAT },
234    { PIPE_FORMAT_R16G16_USCALED,       PIPE_FORMAT_R32G32_FLOAT },
235    { PIPE_FORMAT_R16G16B16_USCALED,    PIPE_FORMAT_R32G32B32_FLOAT },
236    { PIPE_FORMAT_R16G16B16A16_USCALED, PIPE_FORMAT_R32G32B32A32_FLOAT },
237    { PIPE_FORMAT_R16_SSCALED,          PIPE_FORMAT_R32_FLOAT },
238    { PIPE_FORMAT_R16G16_SSCALED,       PIPE_FORMAT_R32G32_FLOAT },
239    { PIPE_FORMAT_R16G16B16_SSCALED,    PIPE_FORMAT_R32G32B32_FLOAT },
240    { PIPE_FORMAT_R16G16B16A16_SSCALED, PIPE_FORMAT_R32G32B32A32_FLOAT },
241    { PIPE_FORMAT_R8_UNORM,             PIPE_FORMAT_R32_FLOAT },
242    { PIPE_FORMAT_R8G8_UNORM,           PIPE_FORMAT_R32G32_FLOAT },
243    { PIPE_FORMAT_R8G8B8_UNORM,         PIPE_FORMAT_R32G32B32_FLOAT },
244    { PIPE_FORMAT_R8G8B8A8_UNORM,       PIPE_FORMAT_R32G32B32A32_FLOAT },
245    { PIPE_FORMAT_R8_SNORM,             PIPE_FORMAT_R32_FLOAT },
246    { PIPE_FORMAT_R8G8_SNORM,           PIPE_FORMAT_R32G32_FLOAT },
247    { PIPE_FORMAT_R8G8B8_SNORM,         PIPE_FORMAT_R32G32B32_FLOAT },
248    { PIPE_FORMAT_R8G8B8A8_SNORM,       PIPE_FORMAT_R32G32B32A32_FLOAT },
249    { PIPE_FORMAT_R8_USCALED,           PIPE_FORMAT_R32_FLOAT },
250    { PIPE_FORMAT_R8G8_USCALED,         PIPE_FORMAT_R32G32_FLOAT },
251    { PIPE_FORMAT_R8G8B8_USCALED,       PIPE_FORMAT_R32G32B32_FLOAT },
252    { PIPE_FORMAT_R8G8B8A8_USCALED,     PIPE_FORMAT_R32G32B32A32_FLOAT },
253    { PIPE_FORMAT_R8_SSCALED,           PIPE_FORMAT_R32_FLOAT },
254    { PIPE_FORMAT_R8G8_SSCALED,         PIPE_FORMAT_R32G32_FLOAT },
255    { PIPE_FORMAT_R8G8B8_SSCALED,       PIPE_FORMAT_R32G32B32_FLOAT },
256    { PIPE_FORMAT_R8G8B8A8_SSCALED,     PIPE_FORMAT_R32G32B32A32_FLOAT },
257 };
258 
u_vbuf_get_caps(struct pipe_screen * screen,struct u_vbuf_caps * caps)259 boolean u_vbuf_get_caps(struct pipe_screen *screen, struct u_vbuf_caps *caps)
260 {
261    unsigned i;
262    boolean fallback = FALSE;
263 
264    /* I'd rather have a bitfield of which formats are supported and a static
265     * table of the translations indexed by format, but since we don't have C99
266     * we can't easily make a sparsely-populated table indexed by format.  So,
267     * we construct the sparse table here.
268     */
269    for (i = 0; i < PIPE_FORMAT_COUNT; i++)
270       caps->format_translation[i] = i;
271 
272    for (i = 0; i < ARRAY_SIZE(vbuf_format_fallbacks); i++) {
273       enum pipe_format format = vbuf_format_fallbacks[i].from;
274 
275       if (!screen->is_format_supported(screen, format, PIPE_BUFFER, 0,
276                                        PIPE_BIND_VERTEX_BUFFER)) {
277          caps->format_translation[format] = vbuf_format_fallbacks[i].to;
278          fallback = TRUE;
279       }
280    }
281 
282    caps->buffer_offset_unaligned =
283       !screen->get_param(screen,
284                          PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY);
285    caps->buffer_stride_unaligned =
286      !screen->get_param(screen,
287                         PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY);
288    caps->velem_src_offset_unaligned =
289       !screen->get_param(screen,
290                          PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY);
291    caps->user_vertex_buffers =
292       screen->get_param(screen, PIPE_CAP_USER_VERTEX_BUFFERS);
293 
294    if (!caps->buffer_offset_unaligned ||
295        !caps->buffer_stride_unaligned ||
296        !caps->velem_src_offset_unaligned ||
297        !caps->user_vertex_buffers) {
298       fallback = TRUE;
299    }
300 
301    return fallback;
302 }
303 
304 struct u_vbuf *
u_vbuf_create(struct pipe_context * pipe,struct u_vbuf_caps * caps,unsigned aux_vertex_buffer_index)305 u_vbuf_create(struct pipe_context *pipe,
306               struct u_vbuf_caps *caps, unsigned aux_vertex_buffer_index)
307 {
308    struct u_vbuf *mgr = CALLOC_STRUCT(u_vbuf);
309 
310    mgr->caps = *caps;
311    mgr->aux_vertex_buffer_slot = aux_vertex_buffer_index;
312    mgr->pipe = pipe;
313    mgr->cso_cache = cso_cache_create();
314    mgr->translate_cache = translate_cache_create();
315    memset(mgr->fallback_vbs, ~0, sizeof(mgr->fallback_vbs));
316 
317    mgr->uploader = u_upload_create(pipe, 1024 * 1024,
318                                    PIPE_BIND_VERTEX_BUFFER,
319                                    PIPE_USAGE_STREAM);
320 
321    return mgr;
322 }
323 
324 /* u_vbuf uses its own caching for vertex elements, because it needs to keep
325  * its own preprocessed state per vertex element CSO. */
326 static struct u_vbuf_elements *
u_vbuf_set_vertex_elements_internal(struct u_vbuf * mgr,unsigned count,const struct pipe_vertex_element * states)327 u_vbuf_set_vertex_elements_internal(struct u_vbuf *mgr, unsigned count,
328                                     const struct pipe_vertex_element *states)
329 {
330    struct pipe_context *pipe = mgr->pipe;
331    unsigned key_size, hash_key;
332    struct cso_hash_iter iter;
333    struct u_vbuf_elements *ve;
334    struct cso_velems_state velems_state;
335 
336    /* need to include the count into the stored state data too. */
337    key_size = sizeof(struct pipe_vertex_element) * count + sizeof(unsigned);
338    velems_state.count = count;
339    memcpy(velems_state.velems, states,
340           sizeof(struct pipe_vertex_element) * count);
341    hash_key = cso_construct_key((void*)&velems_state, key_size);
342    iter = cso_find_state_template(mgr->cso_cache, hash_key, CSO_VELEMENTS,
343                                   (void*)&velems_state, key_size);
344 
345    if (cso_hash_iter_is_null(iter)) {
346       struct cso_velements *cso = MALLOC_STRUCT(cso_velements);
347       memcpy(&cso->state, &velems_state, key_size);
348       cso->data = u_vbuf_create_vertex_elements(mgr, count, states);
349       cso->delete_state = (cso_state_callback)u_vbuf_delete_vertex_elements;
350       cso->context = (void*)mgr;
351 
352       iter = cso_insert_state(mgr->cso_cache, hash_key, CSO_VELEMENTS, cso);
353       ve = cso->data;
354    } else {
355       ve = ((struct cso_velements *)cso_hash_iter_data(iter))->data;
356    }
357 
358    assert(ve);
359 
360    if (ve != mgr->ve)
361       pipe->bind_vertex_elements_state(pipe, ve->driver_cso);
362 
363    return ve;
364 }
365 
u_vbuf_set_vertex_elements(struct u_vbuf * mgr,unsigned count,const struct pipe_vertex_element * states)366 void u_vbuf_set_vertex_elements(struct u_vbuf *mgr, unsigned count,
367                                const struct pipe_vertex_element *states)
368 {
369    mgr->ve = u_vbuf_set_vertex_elements_internal(mgr, count, states);
370 }
371 
u_vbuf_destroy(struct u_vbuf * mgr)372 void u_vbuf_destroy(struct u_vbuf *mgr)
373 {
374    struct pipe_screen *screen = mgr->pipe->screen;
375    unsigned i;
376    unsigned num_vb = screen->get_shader_param(screen, PIPE_SHADER_VERTEX,
377                                               PIPE_SHADER_CAP_MAX_INPUTS);
378 
379    mgr->pipe->set_index_buffer(mgr->pipe, NULL);
380    pipe_resource_reference(&mgr->index_buffer.buffer, NULL);
381 
382    mgr->pipe->set_vertex_buffers(mgr->pipe, 0, num_vb, NULL);
383 
384    for (i = 0; i < PIPE_MAX_ATTRIBS; i++) {
385       pipe_resource_reference(&mgr->vertex_buffer[i].buffer, NULL);
386    }
387    for (i = 0; i < PIPE_MAX_ATTRIBS; i++) {
388       pipe_resource_reference(&mgr->real_vertex_buffer[i].buffer, NULL);
389    }
390    pipe_resource_reference(&mgr->aux_vertex_buffer_saved.buffer, NULL);
391 
392    translate_cache_destroy(mgr->translate_cache);
393    u_upload_destroy(mgr->uploader);
394    cso_cache_delete(mgr->cso_cache);
395    FREE(mgr);
396 }
397 
398 static enum pipe_error
u_vbuf_translate_buffers(struct u_vbuf * mgr,struct translate_key * key,unsigned vb_mask,unsigned out_vb,int start_vertex,unsigned num_vertices,int start_index,unsigned num_indices,int min_index,boolean unroll_indices)399 u_vbuf_translate_buffers(struct u_vbuf *mgr, struct translate_key *key,
400                          unsigned vb_mask, unsigned out_vb,
401                          int start_vertex, unsigned num_vertices,
402                          int start_index, unsigned num_indices, int min_index,
403                          boolean unroll_indices)
404 {
405    struct translate *tr;
406    struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS] = {0};
407    struct pipe_resource *out_buffer = NULL;
408    uint8_t *out_map;
409    unsigned out_offset, mask;
410 
411    /* Get a translate object. */
412    tr = translate_cache_find(mgr->translate_cache, key);
413 
414    /* Map buffers we want to translate. */
415    mask = vb_mask;
416    while (mask) {
417       struct pipe_vertex_buffer *vb;
418       unsigned offset;
419       uint8_t *map;
420       unsigned i = u_bit_scan(&mask);
421 
422       vb = &mgr->vertex_buffer[i];
423       offset = vb->buffer_offset + vb->stride * start_vertex;
424 
425       if (vb->user_buffer) {
426          map = (uint8_t*)vb->user_buffer + offset;
427       } else {
428          unsigned size = vb->stride ? num_vertices * vb->stride
429                                     : sizeof(double)*4;
430 
431          if (offset+size > vb->buffer->width0) {
432             size = vb->buffer->width0 - offset;
433          }
434 
435          map = pipe_buffer_map_range(mgr->pipe, vb->buffer, offset, size,
436                                      PIPE_TRANSFER_READ, &vb_transfer[i]);
437       }
438 
439       /* Subtract min_index so that indexing with the index buffer works. */
440       if (unroll_indices) {
441          map -= (ptrdiff_t)vb->stride * min_index;
442       }
443 
444       tr->set_buffer(tr, i, map, vb->stride, ~0);
445    }
446 
447    /* Translate. */
448    if (unroll_indices) {
449       struct pipe_index_buffer *ib = &mgr->index_buffer;
450       struct pipe_transfer *transfer = NULL;
451       unsigned offset = ib->offset + start_index * ib->index_size;
452       uint8_t *map;
453 
454       assert((ib->buffer || ib->user_buffer) && ib->index_size);
455 
456       /* Create and map the output buffer. */
457       u_upload_alloc(mgr->uploader, 0,
458                      key->output_stride * num_indices, 4,
459                      &out_offset, &out_buffer,
460                      (void**)&out_map);
461       if (!out_buffer)
462          return PIPE_ERROR_OUT_OF_MEMORY;
463 
464       if (ib->user_buffer) {
465          map = (uint8_t*)ib->user_buffer + offset;
466       } else {
467          map = pipe_buffer_map_range(mgr->pipe, ib->buffer, offset,
468                                      num_indices * ib->index_size,
469                                      PIPE_TRANSFER_READ, &transfer);
470       }
471 
472       switch (ib->index_size) {
473       case 4:
474          tr->run_elts(tr, (unsigned*)map, num_indices, 0, 0, out_map);
475          break;
476       case 2:
477          tr->run_elts16(tr, (uint16_t*)map, num_indices, 0, 0, out_map);
478          break;
479       case 1:
480          tr->run_elts8(tr, map, num_indices, 0, 0, out_map);
481          break;
482       }
483 
484       if (transfer) {
485          pipe_buffer_unmap(mgr->pipe, transfer);
486       }
487    } else {
488       /* Create and map the output buffer. */
489       u_upload_alloc(mgr->uploader,
490                      key->output_stride * start_vertex,
491                      key->output_stride * num_vertices, 4,
492                      &out_offset, &out_buffer,
493                      (void**)&out_map);
494       if (!out_buffer)
495          return PIPE_ERROR_OUT_OF_MEMORY;
496 
497       out_offset -= key->output_stride * start_vertex;
498 
499       tr->run(tr, 0, num_vertices, 0, 0, out_map);
500    }
501 
502    /* Unmap all buffers. */
503    mask = vb_mask;
504    while (mask) {
505       unsigned i = u_bit_scan(&mask);
506 
507       if (vb_transfer[i]) {
508          pipe_buffer_unmap(mgr->pipe, vb_transfer[i]);
509       }
510    }
511 
512    /* Setup the new vertex buffer. */
513    mgr->real_vertex_buffer[out_vb].buffer_offset = out_offset;
514    mgr->real_vertex_buffer[out_vb].stride = key->output_stride;
515 
516    /* Move the buffer reference. */
517    pipe_resource_reference(
518       &mgr->real_vertex_buffer[out_vb].buffer, NULL);
519    mgr->real_vertex_buffer[out_vb].buffer = out_buffer;
520 
521    return PIPE_OK;
522 }
523 
524 static boolean
u_vbuf_translate_find_free_vb_slots(struct u_vbuf * mgr,unsigned mask[VB_NUM])525 u_vbuf_translate_find_free_vb_slots(struct u_vbuf *mgr,
526                                     unsigned mask[VB_NUM])
527 {
528    unsigned type;
529    unsigned fallback_vbs[VB_NUM];
530    /* Set the bit for each buffer which is incompatible, or isn't set. */
531    uint32_t unused_vb_mask =
532       mgr->ve->incompatible_vb_mask_all | mgr->incompatible_vb_mask |
533       ~mgr->enabled_vb_mask;
534 
535    memset(fallback_vbs, ~0, sizeof(fallback_vbs));
536 
537    /* Find free slots for each type if needed. */
538    for (type = 0; type < VB_NUM; type++) {
539       if (mask[type]) {
540          uint32_t index;
541 
542          if (!unused_vb_mask) {
543             return FALSE;
544          }
545 
546          index = ffs(unused_vb_mask) - 1;
547          fallback_vbs[type] = index;
548          unused_vb_mask &= ~(1 << index);
549          /*printf("found slot=%i for type=%i\n", index, type);*/
550       }
551    }
552 
553    for (type = 0; type < VB_NUM; type++) {
554       if (mask[type]) {
555          mgr->dirty_real_vb_mask |= 1 << fallback_vbs[type];
556       }
557    }
558 
559    memcpy(mgr->fallback_vbs, fallback_vbs, sizeof(fallback_vbs));
560    return TRUE;
561 }
562 
563 static boolean
u_vbuf_translate_begin(struct u_vbuf * mgr,int start_vertex,unsigned num_vertices,int start_instance,unsigned num_instances,int start_index,unsigned num_indices,int min_index,boolean unroll_indices)564 u_vbuf_translate_begin(struct u_vbuf *mgr,
565                        int start_vertex, unsigned num_vertices,
566                        int start_instance, unsigned num_instances,
567                        int start_index, unsigned num_indices, int min_index,
568                        boolean unroll_indices)
569 {
570    unsigned mask[VB_NUM] = {0};
571    struct translate_key key[VB_NUM];
572    unsigned elem_index[VB_NUM][PIPE_MAX_ATTRIBS]; /* ... into key.elements */
573    unsigned i, type;
574    unsigned incompatible_vb_mask = mgr->incompatible_vb_mask &
575                                    mgr->ve->used_vb_mask;
576 
577    int start[VB_NUM] = {
578       start_vertex,     /* VERTEX */
579       start_instance,   /* INSTANCE */
580       0                 /* CONST */
581    };
582 
583    unsigned num[VB_NUM] = {
584       num_vertices,     /* VERTEX */
585       num_instances,    /* INSTANCE */
586       1                 /* CONST */
587    };
588 
589    memset(key, 0, sizeof(key));
590    memset(elem_index, ~0, sizeof(elem_index));
591 
592    /* See if there are vertex attribs of each type to translate and
593     * which ones. */
594    for (i = 0; i < mgr->ve->count; i++) {
595       unsigned vb_index = mgr->ve->ve[i].vertex_buffer_index;
596 
597       if (!mgr->vertex_buffer[vb_index].stride) {
598          if (!(mgr->ve->incompatible_elem_mask & (1 << i)) &&
599              !(incompatible_vb_mask & (1 << vb_index))) {
600             continue;
601          }
602          mask[VB_CONST] |= 1 << vb_index;
603       } else if (mgr->ve->ve[i].instance_divisor) {
604          if (!(mgr->ve->incompatible_elem_mask & (1 << i)) &&
605              !(incompatible_vb_mask & (1 << vb_index))) {
606             continue;
607          }
608          mask[VB_INSTANCE] |= 1 << vb_index;
609       } else {
610          if (!unroll_indices &&
611              !(mgr->ve->incompatible_elem_mask & (1 << i)) &&
612              !(incompatible_vb_mask & (1 << vb_index))) {
613             continue;
614          }
615          mask[VB_VERTEX] |= 1 << vb_index;
616       }
617    }
618 
619    assert(mask[VB_VERTEX] || mask[VB_INSTANCE] || mask[VB_CONST]);
620 
621    /* Find free vertex buffer slots. */
622    if (!u_vbuf_translate_find_free_vb_slots(mgr, mask)) {
623       return FALSE;
624    }
625 
626    /* Initialize the translate keys. */
627    for (i = 0; i < mgr->ve->count; i++) {
628       struct translate_key *k;
629       struct translate_element *te;
630       enum pipe_format output_format = mgr->ve->native_format[i];
631       unsigned bit, vb_index = mgr->ve->ve[i].vertex_buffer_index;
632       bit = 1 << vb_index;
633 
634       if (!(mgr->ve->incompatible_elem_mask & (1 << i)) &&
635           !(incompatible_vb_mask & (1 << vb_index)) &&
636           (!unroll_indices || !(mask[VB_VERTEX] & bit))) {
637          continue;
638       }
639 
640       /* Set type to what we will translate.
641        * Whether vertex, instance, or constant attribs. */
642       for (type = 0; type < VB_NUM; type++) {
643          if (mask[type] & bit) {
644             break;
645          }
646       }
647       assert(type < VB_NUM);
648       if (mgr->ve->ve[i].src_format != output_format)
649          assert(translate_is_output_format_supported(output_format));
650       /*printf("velem=%i type=%i\n", i, type);*/
651 
652       /* Add the vertex element. */
653       k = &key[type];
654       elem_index[type][i] = k->nr_elements;
655 
656       te = &k->element[k->nr_elements];
657       te->type = TRANSLATE_ELEMENT_NORMAL;
658       te->instance_divisor = 0;
659       te->input_buffer = vb_index;
660       te->input_format = mgr->ve->ve[i].src_format;
661       te->input_offset = mgr->ve->ve[i].src_offset;
662       te->output_format = output_format;
663       te->output_offset = k->output_stride;
664 
665       k->output_stride += mgr->ve->native_format_size[i];
666       k->nr_elements++;
667    }
668 
669    /* Translate buffers. */
670    for (type = 0; type < VB_NUM; type++) {
671       if (key[type].nr_elements) {
672          enum pipe_error err;
673          err = u_vbuf_translate_buffers(mgr, &key[type], mask[type],
674                                         mgr->fallback_vbs[type],
675                                         start[type], num[type],
676                                         start_index, num_indices, min_index,
677                                         unroll_indices && type == VB_VERTEX);
678          if (err != PIPE_OK)
679             return FALSE;
680 
681          /* Fixup the stride for constant attribs. */
682          if (type == VB_CONST) {
683             mgr->real_vertex_buffer[mgr->fallback_vbs[VB_CONST]].stride = 0;
684          }
685       }
686    }
687 
688    /* Setup new vertex elements. */
689    for (i = 0; i < mgr->ve->count; i++) {
690       for (type = 0; type < VB_NUM; type++) {
691          if (elem_index[type][i] < key[type].nr_elements) {
692             struct translate_element *te = &key[type].element[elem_index[type][i]];
693             mgr->fallback_velems[i].instance_divisor = mgr->ve->ve[i].instance_divisor;
694             mgr->fallback_velems[i].src_format = te->output_format;
695             mgr->fallback_velems[i].src_offset = te->output_offset;
696             mgr->fallback_velems[i].vertex_buffer_index = mgr->fallback_vbs[type];
697 
698             /* elem_index[type][i] can only be set for one type. */
699             assert(type > VB_INSTANCE || elem_index[type+1][i] == ~0u);
700             assert(type > VB_VERTEX   || elem_index[type+2][i] == ~0u);
701             break;
702          }
703       }
704       /* No translating, just copy the original vertex element over. */
705       if (type == VB_NUM) {
706          memcpy(&mgr->fallback_velems[i], &mgr->ve->ve[i],
707                 sizeof(struct pipe_vertex_element));
708       }
709    }
710 
711    u_vbuf_set_vertex_elements_internal(mgr, mgr->ve->count,
712                                        mgr->fallback_velems);
713    mgr->using_translate = TRUE;
714    return TRUE;
715 }
716 
u_vbuf_translate_end(struct u_vbuf * mgr)717 static void u_vbuf_translate_end(struct u_vbuf *mgr)
718 {
719    unsigned i;
720 
721    /* Restore vertex elements. */
722    mgr->pipe->bind_vertex_elements_state(mgr->pipe, mgr->ve->driver_cso);
723    mgr->using_translate = FALSE;
724 
725    /* Unreference the now-unused VBOs. */
726    for (i = 0; i < VB_NUM; i++) {
727       unsigned vb = mgr->fallback_vbs[i];
728       if (vb != ~0u) {
729          pipe_resource_reference(&mgr->real_vertex_buffer[vb].buffer, NULL);
730          mgr->fallback_vbs[i] = ~0;
731 
732          /* This will cause the buffer to be unbound in the driver later. */
733          mgr->dirty_real_vb_mask |= 1 << vb;
734       }
735    }
736 }
737 
738 static void *
u_vbuf_create_vertex_elements(struct u_vbuf * mgr,unsigned count,const struct pipe_vertex_element * attribs)739 u_vbuf_create_vertex_elements(struct u_vbuf *mgr, unsigned count,
740                               const struct pipe_vertex_element *attribs)
741 {
742    struct pipe_context *pipe = mgr->pipe;
743    unsigned i;
744    struct pipe_vertex_element driver_attribs[PIPE_MAX_ATTRIBS];
745    struct u_vbuf_elements *ve = CALLOC_STRUCT(u_vbuf_elements);
746    uint32_t used_buffers = 0;
747 
748    ve->count = count;
749 
750    memcpy(ve->ve, attribs, sizeof(struct pipe_vertex_element) * count);
751    memcpy(driver_attribs, attribs, sizeof(struct pipe_vertex_element) * count);
752 
753    /* Set the best native format in case the original format is not
754     * supported. */
755    for (i = 0; i < count; i++) {
756       enum pipe_format format = ve->ve[i].src_format;
757 
758       ve->src_format_size[i] = util_format_get_blocksize(format);
759 
760       used_buffers |= 1 << ve->ve[i].vertex_buffer_index;
761 
762       if (!ve->ve[i].instance_divisor) {
763          ve->noninstance_vb_mask_any |= 1 << ve->ve[i].vertex_buffer_index;
764       }
765 
766       format = mgr->caps.format_translation[format];
767 
768       driver_attribs[i].src_format = format;
769       ve->native_format[i] = format;
770       ve->native_format_size[i] =
771             util_format_get_blocksize(ve->native_format[i]);
772 
773       if (ve->ve[i].src_format != format ||
774           (!mgr->caps.velem_src_offset_unaligned &&
775            ve->ve[i].src_offset % 4 != 0)) {
776          ve->incompatible_elem_mask |= 1 << i;
777          ve->incompatible_vb_mask_any |= 1 << ve->ve[i].vertex_buffer_index;
778       } else {
779          ve->compatible_vb_mask_any |= 1 << ve->ve[i].vertex_buffer_index;
780       }
781    }
782 
783    ve->used_vb_mask = used_buffers;
784    ve->compatible_vb_mask_all = ~ve->incompatible_vb_mask_any & used_buffers;
785    ve->incompatible_vb_mask_all = ~ve->compatible_vb_mask_any & used_buffers;
786 
787    /* Align the formats and offsets to the size of DWORD if needed. */
788    if (!mgr->caps.velem_src_offset_unaligned) {
789       for (i = 0; i < count; i++) {
790          ve->native_format_size[i] = align(ve->native_format_size[i], 4);
791          driver_attribs[i].src_offset = align(ve->ve[i].src_offset, 4);
792       }
793    }
794 
795    ve->driver_cso =
796       pipe->create_vertex_elements_state(pipe, count, driver_attribs);
797    return ve;
798 }
799 
u_vbuf_delete_vertex_elements(struct u_vbuf * mgr,void * cso)800 static void u_vbuf_delete_vertex_elements(struct u_vbuf *mgr, void *cso)
801 {
802    struct pipe_context *pipe = mgr->pipe;
803    struct u_vbuf_elements *ve = cso;
804 
805    pipe->delete_vertex_elements_state(pipe, ve->driver_cso);
806    FREE(ve);
807 }
808 
u_vbuf_set_vertex_buffers(struct u_vbuf * mgr,unsigned start_slot,unsigned count,const struct pipe_vertex_buffer * bufs)809 void u_vbuf_set_vertex_buffers(struct u_vbuf *mgr,
810                                unsigned start_slot, unsigned count,
811                                const struct pipe_vertex_buffer *bufs)
812 {
813    unsigned i;
814    /* which buffers are enabled */
815    uint32_t enabled_vb_mask = 0;
816    /* which buffers are in user memory */
817    uint32_t user_vb_mask = 0;
818    /* which buffers are incompatible with the driver */
819    uint32_t incompatible_vb_mask = 0;
820    /* which buffers have a non-zero stride */
821    uint32_t nonzero_stride_vb_mask = 0;
822    uint32_t mask = ~(((1ull << count) - 1) << start_slot);
823 
824    /* Zero out the bits we are going to rewrite completely. */
825    mgr->user_vb_mask &= mask;
826    mgr->incompatible_vb_mask &= mask;
827    mgr->nonzero_stride_vb_mask &= mask;
828    mgr->enabled_vb_mask &= mask;
829 
830    if (!bufs) {
831       struct pipe_context *pipe = mgr->pipe;
832       /* Unbind. */
833       mgr->dirty_real_vb_mask &= mask;
834 
835       for (i = 0; i < count; i++) {
836          unsigned dst_index = start_slot + i;
837 
838          pipe_resource_reference(&mgr->vertex_buffer[dst_index].buffer, NULL);
839          pipe_resource_reference(&mgr->real_vertex_buffer[dst_index].buffer,
840                                  NULL);
841       }
842 
843       pipe->set_vertex_buffers(pipe, start_slot, count, NULL);
844       return;
845    }
846 
847    for (i = 0; i < count; i++) {
848       unsigned dst_index = start_slot + i;
849       const struct pipe_vertex_buffer *vb = &bufs[i];
850       struct pipe_vertex_buffer *orig_vb = &mgr->vertex_buffer[dst_index];
851       struct pipe_vertex_buffer *real_vb = &mgr->real_vertex_buffer[dst_index];
852 
853       if (!vb->buffer && !vb->user_buffer) {
854          pipe_resource_reference(&orig_vb->buffer, NULL);
855          pipe_resource_reference(&real_vb->buffer, NULL);
856          real_vb->user_buffer = NULL;
857          continue;
858       }
859 
860       pipe_resource_reference(&orig_vb->buffer, vb->buffer);
861       orig_vb->user_buffer = vb->user_buffer;
862 
863       real_vb->buffer_offset = orig_vb->buffer_offset = vb->buffer_offset;
864       real_vb->stride = orig_vb->stride = vb->stride;
865 
866       if (vb->stride) {
867          nonzero_stride_vb_mask |= 1 << dst_index;
868       }
869       enabled_vb_mask |= 1 << dst_index;
870 
871       if ((!mgr->caps.buffer_offset_unaligned && vb->buffer_offset % 4 != 0) ||
872           (!mgr->caps.buffer_stride_unaligned && vb->stride % 4 != 0)) {
873          incompatible_vb_mask |= 1 << dst_index;
874          pipe_resource_reference(&real_vb->buffer, NULL);
875          continue;
876       }
877 
878       if (!mgr->caps.user_vertex_buffers && vb->user_buffer) {
879          user_vb_mask |= 1 << dst_index;
880          pipe_resource_reference(&real_vb->buffer, NULL);
881          continue;
882       }
883 
884       pipe_resource_reference(&real_vb->buffer, vb->buffer);
885       real_vb->user_buffer = vb->user_buffer;
886    }
887 
888    mgr->user_vb_mask |= user_vb_mask;
889    mgr->incompatible_vb_mask |= incompatible_vb_mask;
890    mgr->nonzero_stride_vb_mask |= nonzero_stride_vb_mask;
891    mgr->enabled_vb_mask |= enabled_vb_mask;
892 
893    /* All changed buffers are marked as dirty, even the NULL ones,
894     * which will cause the NULL buffers to be unbound in the driver later. */
895    mgr->dirty_real_vb_mask |= ~mask;
896 }
897 
u_vbuf_set_index_buffer(struct u_vbuf * mgr,const struct pipe_index_buffer * ib)898 void u_vbuf_set_index_buffer(struct u_vbuf *mgr,
899                              const struct pipe_index_buffer *ib)
900 {
901    struct pipe_context *pipe = mgr->pipe;
902 
903    if (ib) {
904       assert(ib->offset % ib->index_size == 0);
905       pipe_resource_reference(&mgr->index_buffer.buffer, ib->buffer);
906       memcpy(&mgr->index_buffer, ib, sizeof(*ib));
907    } else {
908       pipe_resource_reference(&mgr->index_buffer.buffer, NULL);
909    }
910 
911    pipe->set_index_buffer(pipe, ib);
912 }
913 
914 static enum pipe_error
u_vbuf_upload_buffers(struct u_vbuf * mgr,int start_vertex,unsigned num_vertices,int start_instance,unsigned num_instances)915 u_vbuf_upload_buffers(struct u_vbuf *mgr,
916                       int start_vertex, unsigned num_vertices,
917                       int start_instance, unsigned num_instances)
918 {
919    unsigned i;
920    unsigned nr_velems = mgr->ve->count;
921    struct pipe_vertex_element *velems =
922          mgr->using_translate ? mgr->fallback_velems : mgr->ve->ve;
923    unsigned start_offset[PIPE_MAX_ATTRIBS];
924    unsigned end_offset[PIPE_MAX_ATTRIBS];
925    uint32_t buffer_mask = 0;
926 
927    /* Determine how much data needs to be uploaded. */
928    for (i = 0; i < nr_velems; i++) {
929       struct pipe_vertex_element *velem = &velems[i];
930       unsigned index = velem->vertex_buffer_index;
931       struct pipe_vertex_buffer *vb = &mgr->vertex_buffer[index];
932       unsigned instance_div, first, size, index_bit;
933 
934       /* Skip the buffers generated by translate. */
935       if (index == mgr->fallback_vbs[VB_VERTEX] ||
936           index == mgr->fallback_vbs[VB_INSTANCE] ||
937           index == mgr->fallback_vbs[VB_CONST]) {
938          continue;
939       }
940 
941       if (!vb->user_buffer) {
942          continue;
943       }
944 
945       instance_div = velem->instance_divisor;
946       first = vb->buffer_offset + velem->src_offset;
947 
948       if (!vb->stride) {
949          /* Constant attrib. */
950          size = mgr->ve->src_format_size[i];
951       } else if (instance_div) {
952          /* Per-instance attrib. */
953          unsigned count = (num_instances + instance_div - 1) / instance_div;
954          first += vb->stride * start_instance;
955          size = vb->stride * (count - 1) + mgr->ve->src_format_size[i];
956       } else {
957          /* Per-vertex attrib. */
958          first += vb->stride * start_vertex;
959          size = vb->stride * (num_vertices - 1) + mgr->ve->src_format_size[i];
960       }
961 
962       index_bit = 1 << index;
963 
964       /* Update offsets. */
965       if (!(buffer_mask & index_bit)) {
966          start_offset[index] = first;
967          end_offset[index] = first + size;
968       } else {
969          if (first < start_offset[index])
970             start_offset[index] = first;
971          if (first + size > end_offset[index])
972             end_offset[index] = first + size;
973       }
974 
975       buffer_mask |= index_bit;
976    }
977 
978    /* Upload buffers. */
979    while (buffer_mask) {
980       unsigned start, end;
981       struct pipe_vertex_buffer *real_vb;
982       const uint8_t *ptr;
983 
984       i = u_bit_scan(&buffer_mask);
985 
986       start = start_offset[i];
987       end = end_offset[i];
988       assert(start < end);
989 
990       real_vb = &mgr->real_vertex_buffer[i];
991       ptr = mgr->vertex_buffer[i].user_buffer;
992 
993       u_upload_data(mgr->uploader, start, end - start, 4, ptr + start,
994                     &real_vb->buffer_offset, &real_vb->buffer);
995       if (!real_vb->buffer)
996          return PIPE_ERROR_OUT_OF_MEMORY;
997 
998       real_vb->buffer_offset -= start;
999    }
1000 
1001    return PIPE_OK;
1002 }
1003 
u_vbuf_need_minmax_index(const struct u_vbuf * mgr)1004 static boolean u_vbuf_need_minmax_index(const struct u_vbuf *mgr)
1005 {
1006    /* See if there are any per-vertex attribs which will be uploaded or
1007     * translated. Use bitmasks to get the info instead of looping over vertex
1008     * elements. */
1009    return (mgr->ve->used_vb_mask &
1010            ((mgr->user_vb_mask |
1011              mgr->incompatible_vb_mask |
1012              mgr->ve->incompatible_vb_mask_any) &
1013             mgr->ve->noninstance_vb_mask_any &
1014             mgr->nonzero_stride_vb_mask)) != 0;
1015 }
1016 
u_vbuf_mapping_vertex_buffer_blocks(const struct u_vbuf * mgr)1017 static boolean u_vbuf_mapping_vertex_buffer_blocks(const struct u_vbuf *mgr)
1018 {
1019    /* Return true if there are hw buffers which don't need to be translated.
1020     *
1021     * We could query whether each buffer is busy, but that would
1022     * be way more costly than this. */
1023    return (mgr->ve->used_vb_mask &
1024            (~mgr->user_vb_mask &
1025             ~mgr->incompatible_vb_mask &
1026             mgr->ve->compatible_vb_mask_all &
1027             mgr->ve->noninstance_vb_mask_any &
1028             mgr->nonzero_stride_vb_mask)) != 0;
1029 }
1030 
u_vbuf_get_minmax_index(struct pipe_context * pipe,struct pipe_index_buffer * ib,boolean primitive_restart,unsigned restart_index,unsigned start,unsigned count,int * out_min_index,int * out_max_index)1031 static void u_vbuf_get_minmax_index(struct pipe_context *pipe,
1032                                     struct pipe_index_buffer *ib,
1033                                     boolean primitive_restart,
1034                                     unsigned restart_index,
1035                                     unsigned start, unsigned count,
1036                                     int *out_min_index,
1037                                     int *out_max_index)
1038 {
1039    struct pipe_transfer *transfer = NULL;
1040    const void *indices;
1041    unsigned i;
1042 
1043    if (ib->user_buffer) {
1044       indices = (uint8_t*)ib->user_buffer +
1045                 ib->offset + start * ib->index_size;
1046    } else {
1047       indices = pipe_buffer_map_range(pipe, ib->buffer,
1048                                       ib->offset + start * ib->index_size,
1049                                       count * ib->index_size,
1050                                       PIPE_TRANSFER_READ, &transfer);
1051    }
1052 
1053    switch (ib->index_size) {
1054    case 4: {
1055       const unsigned *ui_indices = (const unsigned*)indices;
1056       unsigned max_ui = 0;
1057       unsigned min_ui = ~0U;
1058       if (primitive_restart) {
1059          for (i = 0; i < count; i++) {
1060             if (ui_indices[i] != restart_index) {
1061                if (ui_indices[i] > max_ui) max_ui = ui_indices[i];
1062                if (ui_indices[i] < min_ui) min_ui = ui_indices[i];
1063             }
1064          }
1065       }
1066       else {
1067          for (i = 0; i < count; i++) {
1068             if (ui_indices[i] > max_ui) max_ui = ui_indices[i];
1069             if (ui_indices[i] < min_ui) min_ui = ui_indices[i];
1070          }
1071       }
1072       *out_min_index = min_ui;
1073       *out_max_index = max_ui;
1074       break;
1075    }
1076    case 2: {
1077       const unsigned short *us_indices = (const unsigned short*)indices;
1078       unsigned max_us = 0;
1079       unsigned min_us = ~0U;
1080       if (primitive_restart) {
1081          for (i = 0; i < count; i++) {
1082             if (us_indices[i] != restart_index) {
1083                if (us_indices[i] > max_us) max_us = us_indices[i];
1084                if (us_indices[i] < min_us) min_us = us_indices[i];
1085             }
1086          }
1087       }
1088       else {
1089          for (i = 0; i < count; i++) {
1090             if (us_indices[i] > max_us) max_us = us_indices[i];
1091             if (us_indices[i] < min_us) min_us = us_indices[i];
1092          }
1093       }
1094       *out_min_index = min_us;
1095       *out_max_index = max_us;
1096       break;
1097    }
1098    case 1: {
1099       const unsigned char *ub_indices = (const unsigned char*)indices;
1100       unsigned max_ub = 0;
1101       unsigned min_ub = ~0U;
1102       if (primitive_restart) {
1103          for (i = 0; i < count; i++) {
1104             if (ub_indices[i] != restart_index) {
1105                if (ub_indices[i] > max_ub) max_ub = ub_indices[i];
1106                if (ub_indices[i] < min_ub) min_ub = ub_indices[i];
1107             }
1108          }
1109       }
1110       else {
1111          for (i = 0; i < count; i++) {
1112             if (ub_indices[i] > max_ub) max_ub = ub_indices[i];
1113             if (ub_indices[i] < min_ub) min_ub = ub_indices[i];
1114          }
1115       }
1116       *out_min_index = min_ub;
1117       *out_max_index = max_ub;
1118       break;
1119    }
1120    default:
1121       assert(0);
1122       *out_min_index = 0;
1123       *out_max_index = 0;
1124    }
1125 
1126    if (transfer) {
1127       pipe_buffer_unmap(pipe, transfer);
1128    }
1129 }
1130 
u_vbuf_set_driver_vertex_buffers(struct u_vbuf * mgr)1131 static void u_vbuf_set_driver_vertex_buffers(struct u_vbuf *mgr)
1132 {
1133    struct pipe_context *pipe = mgr->pipe;
1134    unsigned start_slot, count;
1135 
1136    start_slot = ffs(mgr->dirty_real_vb_mask) - 1;
1137    count = util_last_bit(mgr->dirty_real_vb_mask >> start_slot);
1138 
1139    pipe->set_vertex_buffers(pipe, start_slot, count,
1140                             mgr->real_vertex_buffer + start_slot);
1141    mgr->dirty_real_vb_mask = 0;
1142 }
1143 
u_vbuf_draw_vbo(struct u_vbuf * mgr,const struct pipe_draw_info * info)1144 void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info)
1145 {
1146    struct pipe_context *pipe = mgr->pipe;
1147    int start_vertex, min_index;
1148    unsigned num_vertices;
1149    boolean unroll_indices = FALSE;
1150    uint32_t used_vb_mask = mgr->ve->used_vb_mask;
1151    uint32_t user_vb_mask = mgr->user_vb_mask & used_vb_mask;
1152    uint32_t incompatible_vb_mask = mgr->incompatible_vb_mask & used_vb_mask;
1153    struct pipe_draw_info new_info;
1154 
1155    /* Normal draw. No fallback and no user buffers. */
1156    if (!incompatible_vb_mask &&
1157        !mgr->ve->incompatible_elem_mask &&
1158        !user_vb_mask) {
1159 
1160       /* Set vertex buffers if needed. */
1161       if (mgr->dirty_real_vb_mask & used_vb_mask) {
1162          u_vbuf_set_driver_vertex_buffers(mgr);
1163       }
1164 
1165       pipe->draw_vbo(pipe, info);
1166       return;
1167    }
1168 
1169    new_info = *info;
1170 
1171    /* Fallback. We need to know all the parameters. */
1172    if (new_info.indirect) {
1173       struct pipe_transfer *transfer = NULL;
1174       int *data;
1175 
1176       if (new_info.indexed) {
1177          data = pipe_buffer_map_range(pipe, new_info.indirect,
1178                                       new_info.indirect_offset, 20,
1179                                       PIPE_TRANSFER_READ, &transfer);
1180          new_info.index_bias = data[3];
1181          new_info.start_instance = data[4];
1182       }
1183       else {
1184          data = pipe_buffer_map_range(pipe, new_info.indirect,
1185                                       new_info.indirect_offset, 16,
1186                                       PIPE_TRANSFER_READ, &transfer);
1187          new_info.start_instance = data[3];
1188       }
1189 
1190       new_info.count = data[0];
1191       new_info.instance_count = data[1];
1192       new_info.start = data[2];
1193       pipe_buffer_unmap(pipe, transfer);
1194       new_info.indirect = NULL;
1195    }
1196 
1197    if (new_info.indexed) {
1198       /* See if anything needs to be done for per-vertex attribs. */
1199       if (u_vbuf_need_minmax_index(mgr)) {
1200          int max_index;
1201 
1202          if (new_info.max_index != ~0u) {
1203             min_index = new_info.min_index;
1204             max_index = new_info.max_index;
1205          } else {
1206             u_vbuf_get_minmax_index(mgr->pipe, &mgr->index_buffer,
1207                                     new_info.primitive_restart,
1208                                     new_info.restart_index, new_info.start,
1209                                     new_info.count, &min_index, &max_index);
1210          }
1211 
1212          assert(min_index <= max_index);
1213 
1214          start_vertex = min_index + new_info.index_bias;
1215          num_vertices = max_index + 1 - min_index;
1216 
1217          /* Primitive restart doesn't work when unrolling indices.
1218           * We would have to break this drawing operation into several ones. */
1219          /* Use some heuristic to see if unrolling indices improves
1220           * performance. */
1221          if (!new_info.primitive_restart &&
1222              num_vertices > new_info.count*2 &&
1223              num_vertices - new_info.count > 32 &&
1224              !u_vbuf_mapping_vertex_buffer_blocks(mgr)) {
1225             unroll_indices = TRUE;
1226             user_vb_mask &= ~(mgr->nonzero_stride_vb_mask &
1227                               mgr->ve->noninstance_vb_mask_any);
1228          }
1229       } else {
1230          /* Nothing to do for per-vertex attribs. */
1231          start_vertex = 0;
1232          num_vertices = 0;
1233          min_index = 0;
1234       }
1235    } else {
1236       start_vertex = new_info.start;
1237       num_vertices = new_info.count;
1238       min_index = 0;
1239    }
1240 
1241    /* Translate vertices with non-native layouts or formats. */
1242    if (unroll_indices ||
1243        incompatible_vb_mask ||
1244        mgr->ve->incompatible_elem_mask) {
1245       if (!u_vbuf_translate_begin(mgr, start_vertex, num_vertices,
1246                                   new_info.start_instance,
1247                                   new_info.instance_count, new_info.start,
1248                                   new_info.count, min_index, unroll_indices)) {
1249          debug_warn_once("u_vbuf_translate_begin() failed");
1250          return;
1251       }
1252 
1253       if (unroll_indices) {
1254          new_info.indexed = FALSE;
1255          new_info.index_bias = 0;
1256          new_info.min_index = 0;
1257          new_info.max_index = new_info.count - 1;
1258          new_info.start = 0;
1259       }
1260 
1261       user_vb_mask &= ~(incompatible_vb_mask |
1262                         mgr->ve->incompatible_vb_mask_all);
1263    }
1264 
1265    /* Upload user buffers. */
1266    if (user_vb_mask) {
1267       if (u_vbuf_upload_buffers(mgr, start_vertex, num_vertices,
1268                                 new_info.start_instance,
1269                                 new_info.instance_count) != PIPE_OK) {
1270          debug_warn_once("u_vbuf_upload_buffers() failed");
1271          return;
1272       }
1273 
1274       mgr->dirty_real_vb_mask |= user_vb_mask;
1275    }
1276 
1277    /*
1278    if (unroll_indices) {
1279       printf("unrolling indices: start_vertex = %i, num_vertices = %i\n",
1280              start_vertex, num_vertices);
1281       util_dump_draw_info(stdout, info);
1282       printf("\n");
1283    }
1284 
1285    unsigned i;
1286    for (i = 0; i < mgr->nr_vertex_buffers; i++) {
1287       printf("input %i: ", i);
1288       util_dump_vertex_buffer(stdout, mgr->vertex_buffer+i);
1289       printf("\n");
1290    }
1291    for (i = 0; i < mgr->nr_real_vertex_buffers; i++) {
1292       printf("real %i: ", i);
1293       util_dump_vertex_buffer(stdout, mgr->real_vertex_buffer+i);
1294       printf("\n");
1295    }
1296    */
1297 
1298    u_upload_unmap(mgr->uploader);
1299    u_vbuf_set_driver_vertex_buffers(mgr);
1300 
1301    pipe->draw_vbo(pipe, &new_info);
1302 
1303    if (mgr->using_translate) {
1304       u_vbuf_translate_end(mgr);
1305    }
1306 }
1307 
u_vbuf_save_vertex_elements(struct u_vbuf * mgr)1308 void u_vbuf_save_vertex_elements(struct u_vbuf *mgr)
1309 {
1310    assert(!mgr->ve_saved);
1311    mgr->ve_saved = mgr->ve;
1312 }
1313 
u_vbuf_restore_vertex_elements(struct u_vbuf * mgr)1314 void u_vbuf_restore_vertex_elements(struct u_vbuf *mgr)
1315 {
1316    if (mgr->ve != mgr->ve_saved) {
1317       struct pipe_context *pipe = mgr->pipe;
1318 
1319       mgr->ve = mgr->ve_saved;
1320       pipe->bind_vertex_elements_state(pipe,
1321                                        mgr->ve ? mgr->ve->driver_cso : NULL);
1322    }
1323    mgr->ve_saved = NULL;
1324 }
1325 
u_vbuf_save_aux_vertex_buffer_slot(struct u_vbuf * mgr)1326 void u_vbuf_save_aux_vertex_buffer_slot(struct u_vbuf *mgr)
1327 {
1328    struct pipe_vertex_buffer *vb =
1329          &mgr->vertex_buffer[mgr->aux_vertex_buffer_slot];
1330 
1331    pipe_resource_reference(&mgr->aux_vertex_buffer_saved.buffer, vb->buffer);
1332    memcpy(&mgr->aux_vertex_buffer_saved, vb, sizeof(*vb));
1333 }
1334 
u_vbuf_restore_aux_vertex_buffer_slot(struct u_vbuf * mgr)1335 void u_vbuf_restore_aux_vertex_buffer_slot(struct u_vbuf *mgr)
1336 {
1337    u_vbuf_set_vertex_buffers(mgr, mgr->aux_vertex_buffer_slot, 1,
1338                              &mgr->aux_vertex_buffer_saved);
1339    pipe_resource_reference(&mgr->aux_vertex_buffer_saved.buffer, NULL);
1340 }
1341