1 /**************************************************************************
2 *
3 * Copyright 2011 Marek Olšák <maraeo@gmail.com>
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * This module uploads user buffers and translates the vertex buffers which
30 * contain incompatible vertices (i.e. not supported by the driver/hardware)
31 * into compatible ones, based on the Gallium CAPs.
32 *
33 * It does not upload index buffers.
34 *
35 * The module heavily uses bitmasks to represent per-buffer and
36 * per-vertex-element flags to avoid looping over the list of buffers just
37 * to see if there's a non-zero stride, or user buffer, or unsupported format,
38 * etc.
39 *
40 * There are 3 categories of vertex elements, which are processed separately:
41 * - per-vertex attribs (stride != 0, instance_divisor == 0)
42 * - instanced attribs (stride != 0, instance_divisor > 0)
43 * - constant attribs (stride == 0)
44 *
45 * All needed uploads and translations are performed every draw command, but
46 * only the subset of vertices needed for that draw command is uploaded or
47 * translated. (the module never translates whole buffers)
48 *
49 *
50 * The module consists of two main parts:
51 *
52 *
53 * 1) Translate (u_vbuf_translate_begin/end)
54 *
55 * This is pretty much a vertex fetch fallback. It translates vertices from
56 * one vertex buffer to another in an unused vertex buffer slot. It does
57 * whatever is needed to make the vertices readable by the hardware (changes
58 * vertex formats and aligns offsets and strides). The translate module is
59 * used here.
60 *
61 * Each of the 3 categories is translated to a separate buffer.
62 * Only the [min_index, max_index] range is translated. For instanced attribs,
63 * the range is [start_instance, start_instance+instance_count]. For constant
64 * attribs, the range is [0, 1].
65 *
66 *
67 * 2) User buffer uploading (u_vbuf_upload_buffers)
68 *
69 * Only the [min_index, max_index] range is uploaded (just like Translate)
70 * with a single memcpy.
71 *
72 * This method works best for non-indexed draw operations or indexed draw
73 * operations where the [min_index, max_index] range is not being way bigger
74 * than the vertex count.
75 *
76 * If the range is too big (e.g. one triangle with indices {0, 1, 10000}),
77 * the per-vertex attribs are uploaded via the translate module, all packed
78 * into one vertex buffer, and the indexed draw call is turned into
79 * a non-indexed one in the process. This adds additional complexity
80 * to the translate part, but it prevents bad apps from bringing your frame
81 * rate down.
82 *
83 *
84 * If there is nothing to do, it forwards every command to the driver.
85 * The module also has its own CSO cache of vertex element states.
86 */
87
88 #include "util/u_vbuf.h"
89
90 #include "util/u_dump.h"
91 #include "util/format/u_format.h"
92 #include "util/u_helpers.h"
93 #include "util/u_inlines.h"
94 #include "util/u_memory.h"
95 #include "indices/u_primconvert.h"
96 #include "util/u_prim_restart.h"
97 #include "util/u_screen.h"
98 #include "util/u_upload_mgr.h"
99 #include "translate/translate.h"
100 #include "translate/translate_cache.h"
101 #include "cso_cache/cso_cache.h"
102 #include "cso_cache/cso_hash.h"
103
104 struct u_vbuf_elements {
105 unsigned count;
106 struct pipe_vertex_element ve[PIPE_MAX_ATTRIBS];
107
108 unsigned src_format_size[PIPE_MAX_ATTRIBS];
109
110 /* If (velem[i].src_format != native_format[i]), the vertex buffer
111 * referenced by the vertex element cannot be used for rendering and
112 * its vertex data must be translated to native_format[i]. */
113 enum pipe_format native_format[PIPE_MAX_ATTRIBS];
114 unsigned native_format_size[PIPE_MAX_ATTRIBS];
115
116 /* Which buffers are used by the vertex element state. */
117 uint32_t used_vb_mask;
118 /* This might mean two things:
119 * - src_format != native_format, as discussed above.
120 * - src_offset % 4 != 0 (if the caps don't allow such an offset). */
121 uint32_t incompatible_elem_mask; /* each bit describes a corresp. attrib */
122 /* Which buffer has at least one vertex element referencing it
123 * incompatible. */
124 uint32_t incompatible_vb_mask_any;
125 /* Which buffer has all vertex elements referencing it incompatible. */
126 uint32_t incompatible_vb_mask_all;
127 /* Which buffer has at least one vertex element referencing it
128 * compatible. */
129 uint32_t compatible_vb_mask_any;
130 /* Which buffer has all vertex elements referencing it compatible. */
131 uint32_t compatible_vb_mask_all;
132
133 /* Which buffer has at least one vertex element referencing it
134 * non-instanced. */
135 uint32_t noninstance_vb_mask_any;
136
137 /* Which buffers are used by multiple vertex attribs. */
138 uint32_t interleaved_vb_mask;
139
140 void *driver_cso;
141 };
142
143 enum {
144 VB_VERTEX = 0,
145 VB_INSTANCE = 1,
146 VB_CONST = 2,
147 VB_NUM = 3
148 };
149
150 struct u_vbuf {
151 struct u_vbuf_caps caps;
152 bool has_signed_vb_offset;
153
154 struct pipe_context *pipe;
155 struct translate_cache *translate_cache;
156 struct cso_cache cso_cache;
157
158 struct primconvert_context *pc;
159 bool flatshade_first;
160
161 /* This is what was set in set_vertex_buffers.
162 * May contain user buffers. */
163 struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
164 uint32_t enabled_vb_mask;
165
166 /* Vertex buffers for the driver.
167 * There are usually no user buffers. */
168 struct pipe_vertex_buffer real_vertex_buffer[PIPE_MAX_ATTRIBS];
169 uint32_t dirty_real_vb_mask; /* which buffers are dirty since the last
170 call of set_vertex_buffers */
171
172 /* Vertex elements. */
173 struct u_vbuf_elements *ve, *ve_saved;
174
175 /* Vertex elements used for the translate fallback. */
176 struct cso_velems_state fallback_velems;
177 /* If non-NULL, this is a vertex element state used for the translate
178 * fallback and therefore used for rendering too. */
179 boolean using_translate;
180 /* The vertex buffer slot index where translated vertices have been
181 * stored in. */
182 unsigned fallback_vbs[VB_NUM];
183 unsigned fallback_vbs_mask;
184
185 /* Which buffer is a user buffer. */
186 uint32_t user_vb_mask; /* each bit describes a corresp. buffer */
187 /* Which buffer is incompatible (unaligned). */
188 uint32_t incompatible_vb_mask; /* each bit describes a corresp. buffer */
189 /* Which buffer has a non-zero stride. */
190 uint32_t nonzero_stride_vb_mask; /* each bit describes a corresp. buffer */
191 /* Which buffers are allowed (supported by hardware). */
192 uint32_t allowed_vb_mask;
193 };
194
195 static void *
196 u_vbuf_create_vertex_elements(struct u_vbuf *mgr, unsigned count,
197 const struct pipe_vertex_element *attribs);
198 static void u_vbuf_delete_vertex_elements(void *ctx, void *state,
199 enum cso_cache_type type);
200
201 static const struct {
202 enum pipe_format from, to;
203 } vbuf_format_fallbacks[] = {
204 { PIPE_FORMAT_R32_FIXED, PIPE_FORMAT_R32_FLOAT },
205 { PIPE_FORMAT_R32G32_FIXED, PIPE_FORMAT_R32G32_FLOAT },
206 { PIPE_FORMAT_R32G32B32_FIXED, PIPE_FORMAT_R32G32B32_FLOAT },
207 { PIPE_FORMAT_R32G32B32A32_FIXED, PIPE_FORMAT_R32G32B32A32_FLOAT },
208 { PIPE_FORMAT_R16_FLOAT, PIPE_FORMAT_R32_FLOAT },
209 { PIPE_FORMAT_R16G16_FLOAT, PIPE_FORMAT_R32G32_FLOAT },
210 { PIPE_FORMAT_R16G16B16_FLOAT, PIPE_FORMAT_R32G32B32_FLOAT },
211 { PIPE_FORMAT_R16G16B16A16_FLOAT, PIPE_FORMAT_R32G32B32A32_FLOAT },
212 { PIPE_FORMAT_R64_FLOAT, PIPE_FORMAT_R32_FLOAT },
213 { PIPE_FORMAT_R64G64_FLOAT, PIPE_FORMAT_R32G32_FLOAT },
214 { PIPE_FORMAT_R64G64B64_FLOAT, PIPE_FORMAT_R32G32B32_FLOAT },
215 { PIPE_FORMAT_R64G64B64A64_FLOAT, PIPE_FORMAT_R32G32B32A32_FLOAT },
216 { PIPE_FORMAT_R32_UNORM, PIPE_FORMAT_R32_FLOAT },
217 { PIPE_FORMAT_R32G32_UNORM, PIPE_FORMAT_R32G32_FLOAT },
218 { PIPE_FORMAT_R32G32B32_UNORM, PIPE_FORMAT_R32G32B32_FLOAT },
219 { PIPE_FORMAT_R32G32B32A32_UNORM, PIPE_FORMAT_R32G32B32A32_FLOAT },
220 { PIPE_FORMAT_R32_SNORM, PIPE_FORMAT_R32_FLOAT },
221 { PIPE_FORMAT_R32G32_SNORM, PIPE_FORMAT_R32G32_FLOAT },
222 { PIPE_FORMAT_R32G32B32_SNORM, PIPE_FORMAT_R32G32B32_FLOAT },
223 { PIPE_FORMAT_R32G32B32A32_SNORM, PIPE_FORMAT_R32G32B32A32_FLOAT },
224 { PIPE_FORMAT_R32_USCALED, PIPE_FORMAT_R32_FLOAT },
225 { PIPE_FORMAT_R32G32_USCALED, PIPE_FORMAT_R32G32_FLOAT },
226 { PIPE_FORMAT_R32G32B32_USCALED, PIPE_FORMAT_R32G32B32_FLOAT },
227 { PIPE_FORMAT_R32G32B32A32_USCALED, PIPE_FORMAT_R32G32B32A32_FLOAT },
228 { PIPE_FORMAT_R32_SSCALED, PIPE_FORMAT_R32_FLOAT },
229 { PIPE_FORMAT_R32G32_SSCALED, PIPE_FORMAT_R32G32_FLOAT },
230 { PIPE_FORMAT_R32G32B32_SSCALED, PIPE_FORMAT_R32G32B32_FLOAT },
231 { PIPE_FORMAT_R32G32B32A32_SSCALED, PIPE_FORMAT_R32G32B32A32_FLOAT },
232 { PIPE_FORMAT_R16_UNORM, PIPE_FORMAT_R32_FLOAT },
233 { PIPE_FORMAT_R16G16_UNORM, PIPE_FORMAT_R32G32_FLOAT },
234 { PIPE_FORMAT_R16G16B16_UNORM, PIPE_FORMAT_R32G32B32_FLOAT },
235 { PIPE_FORMAT_R16G16B16A16_UNORM, PIPE_FORMAT_R32G32B32A32_FLOAT },
236 { PIPE_FORMAT_R16_SNORM, PIPE_FORMAT_R32_FLOAT },
237 { PIPE_FORMAT_R16G16_SNORM, PIPE_FORMAT_R32G32_FLOAT },
238 { PIPE_FORMAT_R16G16B16_SNORM, PIPE_FORMAT_R32G32B32_FLOAT },
239 { PIPE_FORMAT_R16G16B16_SINT, PIPE_FORMAT_R32G32B32_SINT },
240 { PIPE_FORMAT_R16G16B16_UINT, PIPE_FORMAT_R32G32B32_UINT },
241 { PIPE_FORMAT_R16G16B16A16_SNORM, PIPE_FORMAT_R32G32B32A32_FLOAT },
242 { PIPE_FORMAT_R16_USCALED, PIPE_FORMAT_R32_FLOAT },
243 { PIPE_FORMAT_R16G16_USCALED, PIPE_FORMAT_R32G32_FLOAT },
244 { PIPE_FORMAT_R16G16B16_USCALED, PIPE_FORMAT_R32G32B32_FLOAT },
245 { PIPE_FORMAT_R16G16B16A16_USCALED, PIPE_FORMAT_R32G32B32A32_FLOAT },
246 { PIPE_FORMAT_R16_SSCALED, PIPE_FORMAT_R32_FLOAT },
247 { PIPE_FORMAT_R16G16_SSCALED, PIPE_FORMAT_R32G32_FLOAT },
248 { PIPE_FORMAT_R16G16B16_SSCALED, PIPE_FORMAT_R32G32B32_FLOAT },
249 { PIPE_FORMAT_R16G16B16A16_SSCALED, PIPE_FORMAT_R32G32B32A32_FLOAT },
250 { PIPE_FORMAT_R8_UNORM, PIPE_FORMAT_R32_FLOAT },
251 { PIPE_FORMAT_R8G8_UNORM, PIPE_FORMAT_R32G32_FLOAT },
252 { PIPE_FORMAT_R8G8B8_UNORM, PIPE_FORMAT_R32G32B32_FLOAT },
253 { PIPE_FORMAT_R8G8B8A8_UNORM, PIPE_FORMAT_R32G32B32A32_FLOAT },
254 { PIPE_FORMAT_R8_SNORM, PIPE_FORMAT_R32_FLOAT },
255 { PIPE_FORMAT_R8G8_SNORM, PIPE_FORMAT_R32G32_FLOAT },
256 { PIPE_FORMAT_R8G8B8_SNORM, PIPE_FORMAT_R32G32B32_FLOAT },
257 { PIPE_FORMAT_R8G8B8A8_SNORM, PIPE_FORMAT_R32G32B32A32_FLOAT },
258 { PIPE_FORMAT_R8_USCALED, PIPE_FORMAT_R32_FLOAT },
259 { PIPE_FORMAT_R8G8_USCALED, PIPE_FORMAT_R32G32_FLOAT },
260 { PIPE_FORMAT_R8G8B8_USCALED, PIPE_FORMAT_R32G32B32_FLOAT },
261 { PIPE_FORMAT_R8G8B8A8_USCALED, PIPE_FORMAT_R32G32B32A32_FLOAT },
262 { PIPE_FORMAT_R8_SSCALED, PIPE_FORMAT_R32_FLOAT },
263 { PIPE_FORMAT_R8G8_SSCALED, PIPE_FORMAT_R32G32_FLOAT },
264 { PIPE_FORMAT_R8G8B8_SSCALED, PIPE_FORMAT_R32G32B32_FLOAT },
265 { PIPE_FORMAT_R8G8B8A8_SSCALED, PIPE_FORMAT_R32G32B32A32_FLOAT },
266 };
267
u_vbuf_get_caps(struct pipe_screen * screen,struct u_vbuf_caps * caps,bool needs64b)268 void u_vbuf_get_caps(struct pipe_screen *screen, struct u_vbuf_caps *caps,
269 bool needs64b)
270 {
271 unsigned i;
272
273 memset(caps, 0, sizeof(*caps));
274
275 /* I'd rather have a bitfield of which formats are supported and a static
276 * table of the translations indexed by format, but since we don't have C99
277 * we can't easily make a sparsely-populated table indexed by format. So,
278 * we construct the sparse table here.
279 */
280 for (i = 0; i < PIPE_FORMAT_COUNT; i++)
281 caps->format_translation[i] = i;
282
283 for (i = 0; i < ARRAY_SIZE(vbuf_format_fallbacks); i++) {
284 enum pipe_format format = vbuf_format_fallbacks[i].from;
285 unsigned comp_bits = util_format_get_component_bits(format, 0, 0);
286
287 if ((comp_bits > 32) && !needs64b)
288 continue;
289
290 if (!screen->is_format_supported(screen, format, PIPE_BUFFER, 0, 0,
291 PIPE_BIND_VERTEX_BUFFER)) {
292 caps->format_translation[format] = vbuf_format_fallbacks[i].to;
293 caps->fallback_always = true;
294 }
295 }
296
297 caps->buffer_offset_unaligned =
298 !screen->get_param(screen,
299 PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY);
300 caps->buffer_stride_unaligned =
301 !screen->get_param(screen,
302 PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY);
303 caps->velem_src_offset_unaligned =
304 !screen->get_param(screen,
305 PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY);
306 caps->user_vertex_buffers =
307 screen->get_param(screen, PIPE_CAP_USER_VERTEX_BUFFERS);
308 caps->max_vertex_buffers =
309 screen->get_param(screen, PIPE_CAP_MAX_VERTEX_BUFFERS);
310
311 if (screen->get_param(screen, PIPE_CAP_PRIMITIVE_RESTART) ||
312 screen->get_param(screen, PIPE_CAP_PRIMITIVE_RESTART_FIXED_INDEX)) {
313 caps->rewrite_restart_index = screen->get_param(screen, PIPE_CAP_EMULATE_NONFIXED_PRIMITIVE_RESTART);
314 caps->supported_restart_modes = screen->get_param(screen, PIPE_CAP_SUPPORTED_PRIM_MODES_WITH_RESTART);
315 caps->supported_restart_modes |= BITFIELD_BIT(PIPE_PRIM_PATCHES);
316 if (caps->supported_restart_modes != BITFIELD_MASK(PIPE_PRIM_MAX))
317 caps->fallback_always = true;
318 caps->fallback_always |= caps->rewrite_restart_index;
319 }
320 caps->supported_prim_modes = screen->get_param(screen, PIPE_CAP_SUPPORTED_PRIM_MODES);
321 if (caps->supported_prim_modes != BITFIELD_MASK(PIPE_PRIM_MAX))
322 caps->fallback_always = true;
323
324 if (!screen->is_format_supported(screen, PIPE_FORMAT_R8_UINT, PIPE_BUFFER, 0, 0, PIPE_BIND_INDEX_BUFFER))
325 caps->fallback_always = caps->rewrite_ubyte_ibs = true;
326
327 /* OpenGL 2.0 requires a minimum of 16 vertex buffers */
328 if (caps->max_vertex_buffers < 16)
329 caps->fallback_always = true;
330
331 if (!caps->buffer_offset_unaligned ||
332 !caps->buffer_stride_unaligned ||
333 !caps->velem_src_offset_unaligned)
334 caps->fallback_always = true;
335
336 if (!caps->fallback_always && !caps->user_vertex_buffers)
337 caps->fallback_only_for_user_vbuffers = true;
338 }
339
340 struct u_vbuf *
u_vbuf_create(struct pipe_context * pipe,struct u_vbuf_caps * caps)341 u_vbuf_create(struct pipe_context *pipe, struct u_vbuf_caps *caps)
342 {
343 struct u_vbuf *mgr = CALLOC_STRUCT(u_vbuf);
344
345 mgr->caps = *caps;
346 mgr->pipe = pipe;
347 if (caps->rewrite_ubyte_ibs || caps->rewrite_restart_index ||
348 /* require all but patches */
349 ((caps->supported_prim_modes & caps->supported_restart_modes & BITFIELD_MASK(PIPE_PRIM_MAX))) !=
350 BITFIELD_MASK(PIPE_PRIM_MAX)) {
351 struct primconvert_config cfg;
352 cfg.fixed_prim_restart = caps->rewrite_restart_index;
353 cfg.primtypes_mask = caps->supported_prim_modes;
354 cfg.restart_primtypes_mask = caps->supported_restart_modes;
355 mgr->pc = util_primconvert_create_config(pipe, &cfg);
356 }
357 mgr->translate_cache = translate_cache_create();
358 memset(mgr->fallback_vbs, ~0, sizeof(mgr->fallback_vbs));
359 mgr->allowed_vb_mask = u_bit_consecutive(0, mgr->caps.max_vertex_buffers);
360
361 mgr->has_signed_vb_offset =
362 pipe->screen->get_param(pipe->screen,
363 PIPE_CAP_SIGNED_VERTEX_BUFFER_OFFSET);
364
365 cso_cache_init(&mgr->cso_cache, pipe);
366 cso_cache_set_delete_cso_callback(&mgr->cso_cache,
367 u_vbuf_delete_vertex_elements, pipe);
368
369 return mgr;
370 }
371
372 /* u_vbuf uses its own caching for vertex elements, because it needs to keep
373 * its own preprocessed state per vertex element CSO. */
374 static struct u_vbuf_elements *
u_vbuf_set_vertex_elements_internal(struct u_vbuf * mgr,const struct cso_velems_state * velems)375 u_vbuf_set_vertex_elements_internal(struct u_vbuf *mgr,
376 const struct cso_velems_state *velems)
377 {
378 struct pipe_context *pipe = mgr->pipe;
379 unsigned key_size, hash_key;
380 struct cso_hash_iter iter;
381 struct u_vbuf_elements *ve;
382
383 /* need to include the count into the stored state data too. */
384 key_size = sizeof(struct pipe_vertex_element) * velems->count +
385 sizeof(unsigned);
386 hash_key = cso_construct_key((void*)velems, key_size);
387 iter = cso_find_state_template(&mgr->cso_cache, hash_key, CSO_VELEMENTS,
388 (void*)velems, key_size);
389
390 if (cso_hash_iter_is_null(iter)) {
391 struct cso_velements *cso = MALLOC_STRUCT(cso_velements);
392 memcpy(&cso->state, velems, key_size);
393 cso->data = u_vbuf_create_vertex_elements(mgr, velems->count,
394 velems->velems);
395
396 iter = cso_insert_state(&mgr->cso_cache, hash_key, CSO_VELEMENTS, cso);
397 ve = cso->data;
398 } else {
399 ve = ((struct cso_velements *)cso_hash_iter_data(iter))->data;
400 }
401
402 assert(ve);
403
404 if (ve != mgr->ve)
405 pipe->bind_vertex_elements_state(pipe, ve->driver_cso);
406
407 return ve;
408 }
409
u_vbuf_set_vertex_elements(struct u_vbuf * mgr,const struct cso_velems_state * velems)410 void u_vbuf_set_vertex_elements(struct u_vbuf *mgr,
411 const struct cso_velems_state *velems)
412 {
413 mgr->ve = u_vbuf_set_vertex_elements_internal(mgr, velems);
414 }
415
u_vbuf_set_flatshade_first(struct u_vbuf * mgr,bool flatshade_first)416 void u_vbuf_set_flatshade_first(struct u_vbuf *mgr, bool flatshade_first)
417 {
418 mgr->flatshade_first = flatshade_first;
419 }
420
u_vbuf_unset_vertex_elements(struct u_vbuf * mgr)421 void u_vbuf_unset_vertex_elements(struct u_vbuf *mgr)
422 {
423 mgr->ve = NULL;
424 }
425
u_vbuf_destroy(struct u_vbuf * mgr)426 void u_vbuf_destroy(struct u_vbuf *mgr)
427 {
428 struct pipe_screen *screen = mgr->pipe->screen;
429 unsigned i;
430 const unsigned num_vb = screen->get_shader_param(screen, PIPE_SHADER_VERTEX,
431 PIPE_SHADER_CAP_MAX_INPUTS);
432
433 mgr->pipe->set_vertex_buffers(mgr->pipe, 0, 0, num_vb, false, NULL);
434
435 for (i = 0; i < PIPE_MAX_ATTRIBS; i++)
436 pipe_vertex_buffer_unreference(&mgr->vertex_buffer[i]);
437 for (i = 0; i < PIPE_MAX_ATTRIBS; i++)
438 pipe_vertex_buffer_unreference(&mgr->real_vertex_buffer[i]);
439
440 if (mgr->pc)
441 util_primconvert_destroy(mgr->pc);
442
443 translate_cache_destroy(mgr->translate_cache);
444 cso_cache_delete(&mgr->cso_cache);
445 FREE(mgr);
446 }
447
448 static enum pipe_error
u_vbuf_translate_buffers(struct u_vbuf * mgr,struct translate_key * key,const struct pipe_draw_info * info,const struct pipe_draw_start_count_bias * draw,unsigned vb_mask,unsigned out_vb,int start_vertex,unsigned num_vertices,int min_index,boolean unroll_indices)449 u_vbuf_translate_buffers(struct u_vbuf *mgr, struct translate_key *key,
450 const struct pipe_draw_info *info,
451 const struct pipe_draw_start_count_bias *draw,
452 unsigned vb_mask, unsigned out_vb,
453 int start_vertex, unsigned num_vertices,
454 int min_index, boolean unroll_indices)
455 {
456 struct translate *tr;
457 struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS] = {0};
458 struct pipe_resource *out_buffer = NULL;
459 uint8_t *out_map;
460 unsigned out_offset, mask;
461
462 /* Get a translate object. */
463 tr = translate_cache_find(mgr->translate_cache, key);
464
465 /* Map buffers we want to translate. */
466 mask = vb_mask;
467 while (mask) {
468 struct pipe_vertex_buffer *vb;
469 unsigned offset;
470 uint8_t *map;
471 unsigned i = u_bit_scan(&mask);
472
473 vb = &mgr->vertex_buffer[i];
474 offset = vb->buffer_offset + vb->stride * start_vertex;
475
476 if (vb->is_user_buffer) {
477 map = (uint8_t*)vb->buffer.user + offset;
478 } else {
479 unsigned size = vb->stride ? num_vertices * vb->stride
480 : sizeof(double)*4;
481
482 if (!vb->buffer.resource) {
483 static uint64_t dummy_buf[4] = { 0 };
484 tr->set_buffer(tr, i, dummy_buf, 0, 0);
485 continue;
486 }
487
488 if (vb->stride) {
489 /* the stride cannot be used to calculate the map size of the buffer,
490 * as it only determines the bytes between elements, not the size of elements
491 * themselves, meaning that if stride < element_size, the mapped size will
492 * be too small and conversion will overrun the map buffer
493 *
494 * instead, add the size of the largest possible attribute to ensure the map is large enough
495 */
496 unsigned last_offset = offset + size - vb->stride;
497 size = MAX2(size, last_offset + sizeof(double)*4);
498 }
499
500 if (offset + size > vb->buffer.resource->width0) {
501 /* Don't try to map past end of buffer. This often happens when
502 * we're translating an attribute that's at offset > 0 from the
503 * start of the vertex. If we'd subtract attrib's offset from
504 * the size, this probably wouldn't happen.
505 */
506 size = vb->buffer.resource->width0 - offset;
507
508 /* Also adjust num_vertices. A common user error is to call
509 * glDrawRangeElements() with incorrect 'end' argument. The 'end
510 * value should be the max index value, but people often
511 * accidentally add one to this value. This adjustment avoids
512 * crashing (by reading past the end of a hardware buffer mapping)
513 * when people do that.
514 */
515 num_vertices = (size + vb->stride - 1) / vb->stride;
516 }
517
518 map = pipe_buffer_map_range(mgr->pipe, vb->buffer.resource, offset, size,
519 PIPE_MAP_READ, &vb_transfer[i]);
520 }
521
522 /* Subtract min_index so that indexing with the index buffer works. */
523 if (unroll_indices) {
524 map -= (ptrdiff_t)vb->stride * min_index;
525 }
526
527 tr->set_buffer(tr, i, map, vb->stride, info->max_index);
528 }
529
530 /* Translate. */
531 if (unroll_indices) {
532 struct pipe_transfer *transfer = NULL;
533 const unsigned offset = draw->start * info->index_size;
534 uint8_t *map;
535
536 /* Create and map the output buffer. */
537 u_upload_alloc(mgr->pipe->stream_uploader, 0,
538 key->output_stride * draw->count, 4,
539 &out_offset, &out_buffer,
540 (void**)&out_map);
541 if (!out_buffer)
542 return PIPE_ERROR_OUT_OF_MEMORY;
543
544 if (info->has_user_indices) {
545 map = (uint8_t*)info->index.user + offset;
546 } else {
547 map = pipe_buffer_map_range(mgr->pipe, info->index.resource, offset,
548 draw->count * info->index_size,
549 PIPE_MAP_READ, &transfer);
550 }
551
552 switch (info->index_size) {
553 case 4:
554 tr->run_elts(tr, (unsigned*)map, draw->count, 0, 0, out_map);
555 break;
556 case 2:
557 tr->run_elts16(tr, (uint16_t*)map, draw->count, 0, 0, out_map);
558 break;
559 case 1:
560 tr->run_elts8(tr, map, draw->count, 0, 0, out_map);
561 break;
562 }
563
564 if (transfer) {
565 pipe_buffer_unmap(mgr->pipe, transfer);
566 }
567 } else {
568 /* Create and map the output buffer. */
569 u_upload_alloc(mgr->pipe->stream_uploader,
570 mgr->has_signed_vb_offset ?
571 0 : key->output_stride * start_vertex,
572 key->output_stride * num_vertices, 4,
573 &out_offset, &out_buffer,
574 (void**)&out_map);
575 if (!out_buffer)
576 return PIPE_ERROR_OUT_OF_MEMORY;
577
578 out_offset -= key->output_stride * start_vertex;
579
580 tr->run(tr, 0, num_vertices, 0, 0, out_map);
581 }
582
583 /* Unmap all buffers. */
584 mask = vb_mask;
585 while (mask) {
586 unsigned i = u_bit_scan(&mask);
587
588 if (vb_transfer[i]) {
589 pipe_buffer_unmap(mgr->pipe, vb_transfer[i]);
590 }
591 }
592
593 /* Setup the new vertex buffer. */
594 mgr->real_vertex_buffer[out_vb].buffer_offset = out_offset;
595 mgr->real_vertex_buffer[out_vb].stride = key->output_stride;
596
597 /* Move the buffer reference. */
598 pipe_vertex_buffer_unreference(&mgr->real_vertex_buffer[out_vb]);
599 mgr->real_vertex_buffer[out_vb].buffer.resource = out_buffer;
600 mgr->real_vertex_buffer[out_vb].is_user_buffer = false;
601
602 return PIPE_OK;
603 }
604
605 static boolean
u_vbuf_translate_find_free_vb_slots(struct u_vbuf * mgr,unsigned mask[VB_NUM])606 u_vbuf_translate_find_free_vb_slots(struct u_vbuf *mgr,
607 unsigned mask[VB_NUM])
608 {
609 unsigned type;
610 unsigned fallback_vbs[VB_NUM];
611 /* Set the bit for each buffer which is incompatible, or isn't set. */
612 uint32_t unused_vb_mask =
613 mgr->ve->incompatible_vb_mask_all | mgr->incompatible_vb_mask |
614 ~mgr->enabled_vb_mask;
615 uint32_t unused_vb_mask_orig;
616 boolean insufficient_buffers = false;
617
618 /* No vertex buffers available at all */
619 if (!unused_vb_mask)
620 return FALSE;
621
622 memset(fallback_vbs, ~0, sizeof(fallback_vbs));
623 mgr->fallback_vbs_mask = 0;
624
625 /* Find free slots for each type if needed. */
626 unused_vb_mask_orig = unused_vb_mask;
627 for (type = 0; type < VB_NUM; type++) {
628 if (mask[type]) {
629 uint32_t index;
630
631 if (!unused_vb_mask) {
632 insufficient_buffers = true;
633 break;
634 }
635
636 index = ffs(unused_vb_mask) - 1;
637 fallback_vbs[type] = index;
638 mgr->fallback_vbs_mask |= 1 << index;
639 unused_vb_mask &= ~(1 << index);
640 /*printf("found slot=%i for type=%i\n", index, type);*/
641 }
642 }
643
644 if (insufficient_buffers) {
645 /* not enough vbs for all types supported by the hardware, they will have to share one
646 * buffer */
647 uint32_t index = ffs(unused_vb_mask_orig) - 1;
648 /* When sharing one vertex buffer use per-vertex frequency for everything. */
649 fallback_vbs[VB_VERTEX] = index;
650 mgr->fallback_vbs_mask = 1 << index;
651 mask[VB_VERTEX] = mask[VB_VERTEX] | mask[VB_CONST] | mask[VB_INSTANCE];
652 mask[VB_CONST] = 0;
653 mask[VB_INSTANCE] = 0;
654 }
655
656 for (type = 0; type < VB_NUM; type++) {
657 if (mask[type]) {
658 mgr->dirty_real_vb_mask |= 1 << fallback_vbs[type];
659 }
660 }
661
662 memcpy(mgr->fallback_vbs, fallback_vbs, sizeof(fallback_vbs));
663 return TRUE;
664 }
665
666 static boolean
u_vbuf_translate_begin(struct u_vbuf * mgr,const struct pipe_draw_info * info,const struct pipe_draw_start_count_bias * draw,int start_vertex,unsigned num_vertices,int min_index,boolean unroll_indices)667 u_vbuf_translate_begin(struct u_vbuf *mgr,
668 const struct pipe_draw_info *info,
669 const struct pipe_draw_start_count_bias *draw,
670 int start_vertex, unsigned num_vertices,
671 int min_index, boolean unroll_indices)
672 {
673 unsigned mask[VB_NUM] = {0};
674 struct translate_key key[VB_NUM];
675 unsigned elem_index[VB_NUM][PIPE_MAX_ATTRIBS]; /* ... into key.elements */
676 unsigned i, type;
677 const unsigned incompatible_vb_mask = mgr->incompatible_vb_mask &
678 mgr->ve->used_vb_mask;
679
680 const int start[VB_NUM] = {
681 start_vertex, /* VERTEX */
682 info->start_instance, /* INSTANCE */
683 0 /* CONST */
684 };
685
686 const unsigned num[VB_NUM] = {
687 num_vertices, /* VERTEX */
688 info->instance_count, /* INSTANCE */
689 1 /* CONST */
690 };
691
692 memset(key, 0, sizeof(key));
693 memset(elem_index, ~0, sizeof(elem_index));
694
695 /* See if there are vertex attribs of each type to translate and
696 * which ones. */
697 for (i = 0; i < mgr->ve->count; i++) {
698 unsigned vb_index = mgr->ve->ve[i].vertex_buffer_index;
699
700 if (!mgr->vertex_buffer[vb_index].stride) {
701 if (!(mgr->ve->incompatible_elem_mask & (1 << i)) &&
702 !(incompatible_vb_mask & (1 << vb_index))) {
703 continue;
704 }
705 mask[VB_CONST] |= 1 << vb_index;
706 } else if (mgr->ve->ve[i].instance_divisor) {
707 if (!(mgr->ve->incompatible_elem_mask & (1 << i)) &&
708 !(incompatible_vb_mask & (1 << vb_index))) {
709 continue;
710 }
711 mask[VB_INSTANCE] |= 1 << vb_index;
712 } else {
713 if (!unroll_indices &&
714 !(mgr->ve->incompatible_elem_mask & (1 << i)) &&
715 !(incompatible_vb_mask & (1 << vb_index))) {
716 continue;
717 }
718 mask[VB_VERTEX] |= 1 << vb_index;
719 }
720 }
721
722 assert(mask[VB_VERTEX] || mask[VB_INSTANCE] || mask[VB_CONST]);
723
724 /* Find free vertex buffer slots. */
725 if (!u_vbuf_translate_find_free_vb_slots(mgr, mask)) {
726 return FALSE;
727 }
728
729 /* Initialize the translate keys. */
730 for (i = 0; i < mgr->ve->count; i++) {
731 struct translate_key *k;
732 struct translate_element *te;
733 enum pipe_format output_format = mgr->ve->native_format[i];
734 unsigned bit, vb_index = mgr->ve->ve[i].vertex_buffer_index;
735 bit = 1 << vb_index;
736
737 if (!(mgr->ve->incompatible_elem_mask & (1 << i)) &&
738 !(incompatible_vb_mask & (1 << vb_index)) &&
739 (!unroll_indices || !(mask[VB_VERTEX] & bit))) {
740 continue;
741 }
742
743 /* Set type to what we will translate.
744 * Whether vertex, instance, or constant attribs. */
745 for (type = 0; type < VB_NUM; type++) {
746 if (mask[type] & bit) {
747 break;
748 }
749 }
750 assert(type < VB_NUM);
751 if (mgr->ve->ve[i].src_format != output_format)
752 assert(translate_is_output_format_supported(output_format));
753 /*printf("velem=%i type=%i\n", i, type);*/
754
755 /* Add the vertex element. */
756 k = &key[type];
757 elem_index[type][i] = k->nr_elements;
758
759 te = &k->element[k->nr_elements];
760 te->type = TRANSLATE_ELEMENT_NORMAL;
761 te->instance_divisor = 0;
762 te->input_buffer = vb_index;
763 te->input_format = mgr->ve->ve[i].src_format;
764 te->input_offset = mgr->ve->ve[i].src_offset;
765 te->output_format = output_format;
766 te->output_offset = k->output_stride;
767
768 k->output_stride += mgr->ve->native_format_size[i];
769 k->nr_elements++;
770 }
771
772 /* Translate buffers. */
773 for (type = 0; type < VB_NUM; type++) {
774 if (key[type].nr_elements) {
775 enum pipe_error err;
776 err = u_vbuf_translate_buffers(mgr, &key[type], info, draw,
777 mask[type], mgr->fallback_vbs[type],
778 start[type], num[type], min_index,
779 unroll_indices && type == VB_VERTEX);
780 if (err != PIPE_OK)
781 return FALSE;
782
783 /* Fixup the stride for constant attribs. */
784 if (type == VB_CONST) {
785 mgr->real_vertex_buffer[mgr->fallback_vbs[VB_CONST]].stride = 0;
786 }
787 }
788 }
789
790 /* Setup new vertex elements. */
791 for (i = 0; i < mgr->ve->count; i++) {
792 for (type = 0; type < VB_NUM; type++) {
793 if (elem_index[type][i] < key[type].nr_elements) {
794 struct translate_element *te = &key[type].element[elem_index[type][i]];
795 mgr->fallback_velems.velems[i].instance_divisor = mgr->ve->ve[i].instance_divisor;
796 mgr->fallback_velems.velems[i].src_format = te->output_format;
797 mgr->fallback_velems.velems[i].src_offset = te->output_offset;
798 mgr->fallback_velems.velems[i].vertex_buffer_index = mgr->fallback_vbs[type];
799
800 /* elem_index[type][i] can only be set for one type. */
801 assert(type > VB_INSTANCE || elem_index[type+1][i] == ~0u);
802 assert(type > VB_VERTEX || elem_index[type+2][i] == ~0u);
803 break;
804 }
805 }
806 /* No translating, just copy the original vertex element over. */
807 if (type == VB_NUM) {
808 memcpy(&mgr->fallback_velems.velems[i], &mgr->ve->ve[i],
809 sizeof(struct pipe_vertex_element));
810 }
811 }
812
813 mgr->fallback_velems.count = mgr->ve->count;
814
815 u_vbuf_set_vertex_elements_internal(mgr, &mgr->fallback_velems);
816 mgr->using_translate = TRUE;
817 return TRUE;
818 }
819
u_vbuf_translate_end(struct u_vbuf * mgr)820 static void u_vbuf_translate_end(struct u_vbuf *mgr)
821 {
822 unsigned i;
823
824 /* Restore vertex elements. */
825 mgr->pipe->bind_vertex_elements_state(mgr->pipe, mgr->ve->driver_cso);
826 mgr->using_translate = FALSE;
827
828 /* Unreference the now-unused VBOs. */
829 for (i = 0; i < VB_NUM; i++) {
830 unsigned vb = mgr->fallback_vbs[i];
831 if (vb != ~0u) {
832 pipe_resource_reference(&mgr->real_vertex_buffer[vb].buffer.resource, NULL);
833 mgr->fallback_vbs[i] = ~0;
834 }
835 }
836 /* This will cause the buffer to be unbound in the driver later. */
837 mgr->dirty_real_vb_mask |= mgr->fallback_vbs_mask;
838 mgr->fallback_vbs_mask = 0;
839 }
840
841 static void *
u_vbuf_create_vertex_elements(struct u_vbuf * mgr,unsigned count,const struct pipe_vertex_element * attribs)842 u_vbuf_create_vertex_elements(struct u_vbuf *mgr, unsigned count,
843 const struct pipe_vertex_element *attribs)
844 {
845 struct pipe_vertex_element tmp[PIPE_MAX_ATTRIBS];
846 util_lower_uint64_vertex_elements(&attribs, &count, tmp);
847
848 struct pipe_context *pipe = mgr->pipe;
849 unsigned i;
850 struct pipe_vertex_element driver_attribs[PIPE_MAX_ATTRIBS];
851 struct u_vbuf_elements *ve = CALLOC_STRUCT(u_vbuf_elements);
852 uint32_t used_buffers = 0;
853
854 ve->count = count;
855
856 memcpy(ve->ve, attribs, sizeof(struct pipe_vertex_element) * count);
857 memcpy(driver_attribs, attribs, sizeof(struct pipe_vertex_element) * count);
858
859 /* Set the best native format in case the original format is not
860 * supported. */
861 for (i = 0; i < count; i++) {
862 enum pipe_format format = ve->ve[i].src_format;
863 unsigned vb_index_bit = 1 << ve->ve[i].vertex_buffer_index;
864
865 ve->src_format_size[i] = util_format_get_blocksize(format);
866
867 if (used_buffers & vb_index_bit)
868 ve->interleaved_vb_mask |= vb_index_bit;
869
870 used_buffers |= vb_index_bit;
871
872 if (!ve->ve[i].instance_divisor) {
873 ve->noninstance_vb_mask_any |= vb_index_bit;
874 }
875
876 format = mgr->caps.format_translation[format];
877
878 driver_attribs[i].src_format = format;
879 ve->native_format[i] = format;
880 ve->native_format_size[i] =
881 util_format_get_blocksize(ve->native_format[i]);
882
883 if (ve->ve[i].src_format != format ||
884 (!mgr->caps.velem_src_offset_unaligned &&
885 ve->ve[i].src_offset % 4 != 0)) {
886 ve->incompatible_elem_mask |= 1 << i;
887 ve->incompatible_vb_mask_any |= vb_index_bit;
888 } else {
889 ve->compatible_vb_mask_any |= vb_index_bit;
890 }
891 }
892
893 if (used_buffers & ~mgr->allowed_vb_mask) {
894 /* More vertex buffers are used than the hardware supports. In
895 * principle, we only need to make sure that less vertex buffers are
896 * used, and mark some of the latter vertex buffers as incompatible.
897 * For now, mark all vertex buffers as incompatible.
898 */
899 ve->incompatible_vb_mask_any = used_buffers;
900 ve->compatible_vb_mask_any = 0;
901 ve->incompatible_elem_mask = u_bit_consecutive(0, count);
902 }
903
904 ve->used_vb_mask = used_buffers;
905 ve->compatible_vb_mask_all = ~ve->incompatible_vb_mask_any & used_buffers;
906 ve->incompatible_vb_mask_all = ~ve->compatible_vb_mask_any & used_buffers;
907
908 /* Align the formats and offsets to the size of DWORD if needed. */
909 if (!mgr->caps.velem_src_offset_unaligned) {
910 for (i = 0; i < count; i++) {
911 ve->native_format_size[i] = align(ve->native_format_size[i], 4);
912 driver_attribs[i].src_offset = align(ve->ve[i].src_offset, 4);
913 }
914 }
915
916 /* Only create driver CSO if no incompatible elements */
917 if (!ve->incompatible_elem_mask) {
918 ve->driver_cso =
919 pipe->create_vertex_elements_state(pipe, count, driver_attribs);
920 }
921
922 return ve;
923 }
924
u_vbuf_delete_vertex_elements(void * ctx,void * state,enum cso_cache_type type)925 static void u_vbuf_delete_vertex_elements(void *ctx, void *state,
926 enum cso_cache_type type)
927 {
928 struct pipe_context *pipe = (struct pipe_context*)ctx;
929 struct cso_velements *cso = (struct cso_velements*)state;
930 struct u_vbuf_elements *ve = (struct u_vbuf_elements*)cso->data;
931
932 if (ve->driver_cso)
933 pipe->delete_vertex_elements_state(pipe, ve->driver_cso);
934 FREE(ve);
935 FREE(cso);
936 }
937
u_vbuf_set_vertex_buffers(struct u_vbuf * mgr,unsigned start_slot,unsigned count,unsigned unbind_num_trailing_slots,bool take_ownership,const struct pipe_vertex_buffer * bufs)938 void u_vbuf_set_vertex_buffers(struct u_vbuf *mgr,
939 unsigned start_slot, unsigned count,
940 unsigned unbind_num_trailing_slots,
941 bool take_ownership,
942 const struct pipe_vertex_buffer *bufs)
943 {
944 unsigned i;
945 /* which buffers are enabled */
946 uint32_t enabled_vb_mask = 0;
947 /* which buffers are in user memory */
948 uint32_t user_vb_mask = 0;
949 /* which buffers are incompatible with the driver */
950 uint32_t incompatible_vb_mask = 0;
951 /* which buffers have a non-zero stride */
952 uint32_t nonzero_stride_vb_mask = 0;
953 const uint32_t mask =
954 ~(((1ull << (count + unbind_num_trailing_slots)) - 1) << start_slot);
955
956 /* Zero out the bits we are going to rewrite completely. */
957 mgr->user_vb_mask &= mask;
958 mgr->incompatible_vb_mask &= mask;
959 mgr->nonzero_stride_vb_mask &= mask;
960 mgr->enabled_vb_mask &= mask;
961
962 if (!bufs) {
963 struct pipe_context *pipe = mgr->pipe;
964 /* Unbind. */
965 unsigned total_count = count + unbind_num_trailing_slots;
966 mgr->dirty_real_vb_mask &= mask;
967
968 for (i = 0; i < total_count; i++) {
969 unsigned dst_index = start_slot + i;
970
971 pipe_vertex_buffer_unreference(&mgr->vertex_buffer[dst_index]);
972 pipe_vertex_buffer_unreference(&mgr->real_vertex_buffer[dst_index]);
973 }
974
975 pipe->set_vertex_buffers(pipe, start_slot, count,
976 unbind_num_trailing_slots, false, NULL);
977 return;
978 }
979
980 for (i = 0; i < count; i++) {
981 unsigned dst_index = start_slot + i;
982 const struct pipe_vertex_buffer *vb = &bufs[i];
983 struct pipe_vertex_buffer *orig_vb = &mgr->vertex_buffer[dst_index];
984 struct pipe_vertex_buffer *real_vb = &mgr->real_vertex_buffer[dst_index];
985
986 if (!vb->buffer.resource) {
987 pipe_vertex_buffer_unreference(orig_vb);
988 pipe_vertex_buffer_unreference(real_vb);
989 continue;
990 }
991
992 if (take_ownership) {
993 pipe_vertex_buffer_unreference(orig_vb);
994 memcpy(orig_vb, vb, sizeof(*vb));
995 } else {
996 pipe_vertex_buffer_reference(orig_vb, vb);
997 }
998
999 if (vb->stride) {
1000 nonzero_stride_vb_mask |= 1 << dst_index;
1001 }
1002 enabled_vb_mask |= 1 << dst_index;
1003
1004 if ((!mgr->caps.buffer_offset_unaligned && vb->buffer_offset % 4 != 0) ||
1005 (!mgr->caps.buffer_stride_unaligned && vb->stride % 4 != 0)) {
1006 incompatible_vb_mask |= 1 << dst_index;
1007 real_vb->buffer_offset = vb->buffer_offset;
1008 real_vb->stride = vb->stride;
1009 pipe_vertex_buffer_unreference(real_vb);
1010 real_vb->is_user_buffer = false;
1011 continue;
1012 }
1013
1014 if (!mgr->caps.user_vertex_buffers && vb->is_user_buffer) {
1015 user_vb_mask |= 1 << dst_index;
1016 real_vb->buffer_offset = vb->buffer_offset;
1017 real_vb->stride = vb->stride;
1018 pipe_vertex_buffer_unreference(real_vb);
1019 real_vb->is_user_buffer = false;
1020 continue;
1021 }
1022
1023 pipe_vertex_buffer_reference(real_vb, vb);
1024 }
1025
1026 for (i = 0; i < unbind_num_trailing_slots; i++) {
1027 unsigned dst_index = start_slot + count + i;
1028
1029 pipe_vertex_buffer_unreference(&mgr->vertex_buffer[dst_index]);
1030 pipe_vertex_buffer_unreference(&mgr->real_vertex_buffer[dst_index]);
1031 }
1032
1033 mgr->user_vb_mask |= user_vb_mask;
1034 mgr->incompatible_vb_mask |= incompatible_vb_mask;
1035 mgr->nonzero_stride_vb_mask |= nonzero_stride_vb_mask;
1036 mgr->enabled_vb_mask |= enabled_vb_mask;
1037
1038 /* All changed buffers are marked as dirty, even the NULL ones,
1039 * which will cause the NULL buffers to be unbound in the driver later. */
1040 mgr->dirty_real_vb_mask |= ~mask;
1041 }
1042
1043 static ALWAYS_INLINE bool
get_upload_offset_size(struct u_vbuf * mgr,const struct pipe_vertex_buffer * vb,struct u_vbuf_elements * ve,const struct pipe_vertex_element * velem,unsigned vb_index,unsigned velem_index,int start_vertex,unsigned num_vertices,int start_instance,unsigned num_instances,unsigned * offset,unsigned * size)1044 get_upload_offset_size(struct u_vbuf *mgr,
1045 const struct pipe_vertex_buffer *vb,
1046 struct u_vbuf_elements *ve,
1047 const struct pipe_vertex_element *velem,
1048 unsigned vb_index, unsigned velem_index,
1049 int start_vertex, unsigned num_vertices,
1050 int start_instance, unsigned num_instances,
1051 unsigned *offset, unsigned *size)
1052 {
1053 /* Skip the buffers generated by translate. */
1054 if ((1 << vb_index) & mgr->fallback_vbs_mask || !vb->is_user_buffer)
1055 return false;
1056
1057 unsigned instance_div = velem->instance_divisor;
1058 *offset = vb->buffer_offset + velem->src_offset;
1059
1060 if (!vb->stride) {
1061 /* Constant attrib. */
1062 *size = ve->src_format_size[velem_index];
1063 } else if (instance_div) {
1064 /* Per-instance attrib. */
1065
1066 /* Figure out how many instances we'll render given instance_div. We
1067 * can't use the typical div_round_up() pattern because the CTS uses
1068 * instance_div = ~0 for a test, which overflows div_round_up()'s
1069 * addition.
1070 */
1071 unsigned count = num_instances / instance_div;
1072 if (count * instance_div != num_instances)
1073 count++;
1074
1075 *offset += vb->stride * start_instance;
1076 *size = vb->stride * (count - 1) + ve->src_format_size[velem_index];
1077 } else {
1078 /* Per-vertex attrib. */
1079 *offset += vb->stride * start_vertex;
1080 *size = vb->stride * (num_vertices - 1) + ve->src_format_size[velem_index];
1081 }
1082 return true;
1083 }
1084
1085
1086 static enum pipe_error
u_vbuf_upload_buffers(struct u_vbuf * mgr,int start_vertex,unsigned num_vertices,int start_instance,unsigned num_instances)1087 u_vbuf_upload_buffers(struct u_vbuf *mgr,
1088 int start_vertex, unsigned num_vertices,
1089 int start_instance, unsigned num_instances)
1090 {
1091 unsigned i;
1092 struct u_vbuf_elements *ve = mgr->ve;
1093 unsigned nr_velems = ve->count;
1094 const struct pipe_vertex_element *velems =
1095 mgr->using_translate ? mgr->fallback_velems.velems : ve->ve;
1096
1097 /* Faster path when no vertex attribs are interleaved. */
1098 if ((ve->interleaved_vb_mask & mgr->user_vb_mask) == 0) {
1099 for (i = 0; i < nr_velems; i++) {
1100 const struct pipe_vertex_element *velem = &velems[i];
1101 unsigned index = velem->vertex_buffer_index;
1102 struct pipe_vertex_buffer *vb = &mgr->vertex_buffer[index];
1103 unsigned offset, size;
1104
1105 if (!get_upload_offset_size(mgr, vb, ve, velem, index, i, start_vertex,
1106 num_vertices, start_instance, num_instances,
1107 &offset, &size))
1108 continue;
1109
1110 struct pipe_vertex_buffer *real_vb = &mgr->real_vertex_buffer[index];
1111 const uint8_t *ptr = mgr->vertex_buffer[index].buffer.user;
1112
1113 u_upload_data(mgr->pipe->stream_uploader,
1114 mgr->has_signed_vb_offset ? 0 : offset,
1115 size, 4, ptr + offset, &real_vb->buffer_offset,
1116 &real_vb->buffer.resource);
1117 if (!real_vb->buffer.resource)
1118 return PIPE_ERROR_OUT_OF_MEMORY;
1119
1120 real_vb->buffer_offset -= offset;
1121 }
1122 return PIPE_OK;
1123 }
1124
1125 unsigned start_offset[PIPE_MAX_ATTRIBS];
1126 unsigned end_offset[PIPE_MAX_ATTRIBS];
1127 uint32_t buffer_mask = 0;
1128
1129 /* Slower path supporting interleaved vertex attribs using 2 loops. */
1130 /* Determine how much data needs to be uploaded. */
1131 for (i = 0; i < nr_velems; i++) {
1132 const struct pipe_vertex_element *velem = &velems[i];
1133 unsigned index = velem->vertex_buffer_index;
1134 struct pipe_vertex_buffer *vb = &mgr->vertex_buffer[index];
1135 unsigned first, size, index_bit;
1136
1137 if (!get_upload_offset_size(mgr, vb, ve, velem, index, i, start_vertex,
1138 num_vertices, start_instance, num_instances,
1139 &first, &size))
1140 continue;
1141
1142 index_bit = 1 << index;
1143
1144 /* Update offsets. */
1145 if (!(buffer_mask & index_bit)) {
1146 start_offset[index] = first;
1147 end_offset[index] = first + size;
1148 } else {
1149 if (first < start_offset[index])
1150 start_offset[index] = first;
1151 if (first + size > end_offset[index])
1152 end_offset[index] = first + size;
1153 }
1154
1155 buffer_mask |= index_bit;
1156 }
1157
1158 /* Upload buffers. */
1159 while (buffer_mask) {
1160 unsigned start, end;
1161 struct pipe_vertex_buffer *real_vb;
1162 const uint8_t *ptr;
1163
1164 i = u_bit_scan(&buffer_mask);
1165
1166 start = start_offset[i];
1167 end = end_offset[i];
1168 assert(start < end);
1169
1170 real_vb = &mgr->real_vertex_buffer[i];
1171 ptr = mgr->vertex_buffer[i].buffer.user;
1172
1173 u_upload_data(mgr->pipe->stream_uploader,
1174 mgr->has_signed_vb_offset ? 0 : start,
1175 end - start, 4,
1176 ptr + start, &real_vb->buffer_offset, &real_vb->buffer.resource);
1177 if (!real_vb->buffer.resource)
1178 return PIPE_ERROR_OUT_OF_MEMORY;
1179
1180 real_vb->buffer_offset -= start;
1181 }
1182
1183 return PIPE_OK;
1184 }
1185
u_vbuf_need_minmax_index(const struct u_vbuf * mgr)1186 static boolean u_vbuf_need_minmax_index(const struct u_vbuf *mgr)
1187 {
1188 /* See if there are any per-vertex attribs which will be uploaded or
1189 * translated. Use bitmasks to get the info instead of looping over vertex
1190 * elements. */
1191 return (mgr->ve->used_vb_mask &
1192 ((mgr->user_vb_mask |
1193 mgr->incompatible_vb_mask |
1194 mgr->ve->incompatible_vb_mask_any) &
1195 mgr->ve->noninstance_vb_mask_any &
1196 mgr->nonzero_stride_vb_mask)) != 0;
1197 }
1198
u_vbuf_mapping_vertex_buffer_blocks(const struct u_vbuf * mgr)1199 static boolean u_vbuf_mapping_vertex_buffer_blocks(const struct u_vbuf *mgr)
1200 {
1201 /* Return true if there are hw buffers which don't need to be translated.
1202 *
1203 * We could query whether each buffer is busy, but that would
1204 * be way more costly than this. */
1205 return (mgr->ve->used_vb_mask &
1206 (~mgr->user_vb_mask &
1207 ~mgr->incompatible_vb_mask &
1208 mgr->ve->compatible_vb_mask_all &
1209 mgr->ve->noninstance_vb_mask_any &
1210 mgr->nonzero_stride_vb_mask)) != 0;
1211 }
1212
1213 static void
u_vbuf_get_minmax_index_mapped(const struct pipe_draw_info * info,unsigned count,const void * indices,unsigned * out_min_index,unsigned * out_max_index)1214 u_vbuf_get_minmax_index_mapped(const struct pipe_draw_info *info,
1215 unsigned count,
1216 const void *indices, unsigned *out_min_index,
1217 unsigned *out_max_index)
1218 {
1219 if (!count) {
1220 *out_min_index = 0;
1221 *out_max_index = 0;
1222 return;
1223 }
1224
1225 switch (info->index_size) {
1226 case 4: {
1227 const unsigned *ui_indices = (const unsigned*)indices;
1228 unsigned max = 0;
1229 unsigned min = ~0u;
1230 if (info->primitive_restart) {
1231 for (unsigned i = 0; i < count; i++) {
1232 if (ui_indices[i] != info->restart_index) {
1233 if (ui_indices[i] > max) max = ui_indices[i];
1234 if (ui_indices[i] < min) min = ui_indices[i];
1235 }
1236 }
1237 }
1238 else {
1239 for (unsigned i = 0; i < count; i++) {
1240 if (ui_indices[i] > max) max = ui_indices[i];
1241 if (ui_indices[i] < min) min = ui_indices[i];
1242 }
1243 }
1244 *out_min_index = min;
1245 *out_max_index = max;
1246 break;
1247 }
1248 case 2: {
1249 const unsigned short *us_indices = (const unsigned short*)indices;
1250 unsigned short max = 0;
1251 unsigned short min = ~((unsigned short)0);
1252 if (info->primitive_restart) {
1253 for (unsigned i = 0; i < count; i++) {
1254 if (us_indices[i] != info->restart_index) {
1255 if (us_indices[i] > max) max = us_indices[i];
1256 if (us_indices[i] < min) min = us_indices[i];
1257 }
1258 }
1259 }
1260 else {
1261 for (unsigned i = 0; i < count; i++) {
1262 if (us_indices[i] > max) max = us_indices[i];
1263 if (us_indices[i] < min) min = us_indices[i];
1264 }
1265 }
1266 *out_min_index = min;
1267 *out_max_index = max;
1268 break;
1269 }
1270 case 1: {
1271 const unsigned char *ub_indices = (const unsigned char*)indices;
1272 unsigned char max = 0;
1273 unsigned char min = ~((unsigned char)0);
1274 if (info->primitive_restart) {
1275 for (unsigned i = 0; i < count; i++) {
1276 if (ub_indices[i] != info->restart_index) {
1277 if (ub_indices[i] > max) max = ub_indices[i];
1278 if (ub_indices[i] < min) min = ub_indices[i];
1279 }
1280 }
1281 }
1282 else {
1283 for (unsigned i = 0; i < count; i++) {
1284 if (ub_indices[i] > max) max = ub_indices[i];
1285 if (ub_indices[i] < min) min = ub_indices[i];
1286 }
1287 }
1288 *out_min_index = min;
1289 *out_max_index = max;
1290 break;
1291 }
1292 default:
1293 unreachable("bad index size");
1294 }
1295 }
1296
u_vbuf_get_minmax_index(struct pipe_context * pipe,const struct pipe_draw_info * info,const struct pipe_draw_start_count_bias * draw,unsigned * out_min_index,unsigned * out_max_index)1297 void u_vbuf_get_minmax_index(struct pipe_context *pipe,
1298 const struct pipe_draw_info *info,
1299 const struct pipe_draw_start_count_bias *draw,
1300 unsigned *out_min_index, unsigned *out_max_index)
1301 {
1302 struct pipe_transfer *transfer = NULL;
1303 const void *indices;
1304
1305 if (info->has_user_indices) {
1306 indices = (uint8_t*)info->index.user +
1307 draw->start * info->index_size;
1308 } else {
1309 indices = pipe_buffer_map_range(pipe, info->index.resource,
1310 draw->start * info->index_size,
1311 draw->count * info->index_size,
1312 PIPE_MAP_READ, &transfer);
1313 }
1314
1315 u_vbuf_get_minmax_index_mapped(info, draw->count, indices,
1316 out_min_index, out_max_index);
1317
1318 if (transfer) {
1319 pipe_buffer_unmap(pipe, transfer);
1320 }
1321 }
1322
u_vbuf_set_driver_vertex_buffers(struct u_vbuf * mgr)1323 static void u_vbuf_set_driver_vertex_buffers(struct u_vbuf *mgr)
1324 {
1325 struct pipe_context *pipe = mgr->pipe;
1326 unsigned start_slot, count;
1327
1328 start_slot = ffs(mgr->dirty_real_vb_mask) - 1;
1329 count = util_last_bit(mgr->dirty_real_vb_mask >> start_slot);
1330
1331 if (mgr->dirty_real_vb_mask == mgr->enabled_vb_mask &&
1332 mgr->dirty_real_vb_mask == mgr->user_vb_mask) {
1333 /* Fast path that allows us to transfer the VBO references to the driver
1334 * to skip atomic reference counting there. These are freshly uploaded
1335 * user buffers that can be discarded after this call.
1336 */
1337 pipe->set_vertex_buffers(pipe, start_slot, count, 0, true,
1338 mgr->real_vertex_buffer + start_slot);
1339
1340 /* We don't own the VBO references now. Set them to NULL. */
1341 for (unsigned i = 0; i < count; i++) {
1342 assert(!mgr->real_vertex_buffer[start_slot + i].is_user_buffer);
1343 mgr->real_vertex_buffer[start_slot + i].buffer.resource = NULL;
1344 }
1345 } else {
1346 /* Slow path where we have to keep VBO references. */
1347 pipe->set_vertex_buffers(pipe, start_slot, count, 0, false,
1348 mgr->real_vertex_buffer + start_slot);
1349 }
1350 mgr->dirty_real_vb_mask = 0;
1351 }
1352
1353 static void
u_vbuf_split_indexed_multidraw(struct u_vbuf * mgr,struct pipe_draw_info * info,unsigned drawid_offset,unsigned * indirect_data,unsigned stride,unsigned draw_count)1354 u_vbuf_split_indexed_multidraw(struct u_vbuf *mgr, struct pipe_draw_info *info,
1355 unsigned drawid_offset,
1356 unsigned *indirect_data, unsigned stride,
1357 unsigned draw_count)
1358 {
1359 /* Increase refcount to be able to use take_index_buffer_ownership with
1360 * all draws.
1361 */
1362 if (draw_count > 1 && info->take_index_buffer_ownership)
1363 p_atomic_add(&info->index.resource->reference.count, draw_count - 1);
1364
1365 assert(info->index_size);
1366
1367 for (unsigned i = 0; i < draw_count; i++) {
1368 struct pipe_draw_start_count_bias draw;
1369 unsigned offset = i * stride / 4;
1370
1371 draw.count = indirect_data[offset + 0];
1372 info->instance_count = indirect_data[offset + 1];
1373 draw.start = indirect_data[offset + 2];
1374 draw.index_bias = indirect_data[offset + 3];
1375 info->start_instance = indirect_data[offset + 4];
1376
1377 u_vbuf_draw_vbo(mgr, info, drawid_offset, NULL, draw);
1378 }
1379 }
1380
u_vbuf_draw_vbo(struct u_vbuf * mgr,const struct pipe_draw_info * info,unsigned drawid_offset,const struct pipe_draw_indirect_info * indirect,const struct pipe_draw_start_count_bias draw)1381 void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info,
1382 unsigned drawid_offset,
1383 const struct pipe_draw_indirect_info *indirect,
1384 const struct pipe_draw_start_count_bias draw)
1385 {
1386 struct pipe_context *pipe = mgr->pipe;
1387 int start_vertex;
1388 unsigned min_index;
1389 unsigned num_vertices;
1390 boolean unroll_indices = FALSE;
1391 const uint32_t used_vb_mask = mgr->ve->used_vb_mask;
1392 uint32_t user_vb_mask = mgr->user_vb_mask & used_vb_mask;
1393 const uint32_t incompatible_vb_mask =
1394 mgr->incompatible_vb_mask & used_vb_mask;
1395 struct pipe_draw_info new_info;
1396 struct pipe_draw_start_count_bias new_draw;
1397 unsigned fixed_restart_index = info->index_size ? util_prim_restart_index_from_size(info->index_size) : 0;
1398
1399 /* Normal draw. No fallback and no user buffers. */
1400 if (!incompatible_vb_mask &&
1401 !mgr->ve->incompatible_elem_mask &&
1402 !user_vb_mask &&
1403 (info->index_size != 1 || !mgr->caps.rewrite_ubyte_ibs) &&
1404 (!info->primitive_restart ||
1405 info->restart_index == fixed_restart_index ||
1406 !mgr->caps.rewrite_restart_index) &&
1407 (!info->primitive_restart || mgr->caps.supported_restart_modes & BITFIELD_BIT(info->mode)) &&
1408 mgr->caps.supported_prim_modes & BITFIELD_BIT(info->mode)) {
1409
1410 /* Set vertex buffers if needed. */
1411 if (mgr->dirty_real_vb_mask & used_vb_mask) {
1412 u_vbuf_set_driver_vertex_buffers(mgr);
1413 }
1414
1415 pipe->draw_vbo(pipe, info, drawid_offset, indirect, &draw, 1);
1416 return;
1417 }
1418
1419 new_info = *info;
1420 new_draw = draw;
1421
1422 /* Handle indirect (multi)draws. */
1423 if (indirect && indirect->buffer) {
1424 unsigned draw_count = 0;
1425
1426 /* Get the number of draws. */
1427 if (indirect->indirect_draw_count) {
1428 pipe_buffer_read(pipe, indirect->indirect_draw_count,
1429 indirect->indirect_draw_count_offset,
1430 4, &draw_count);
1431 } else {
1432 draw_count = indirect->draw_count;
1433 }
1434
1435 if (!draw_count)
1436 goto cleanup;
1437
1438 unsigned data_size = (draw_count - 1) * indirect->stride +
1439 (new_info.index_size ? 20 : 16);
1440 unsigned *data = malloc(data_size);
1441 if (!data)
1442 goto cleanup; /* report an error? */
1443
1444 /* Read the used buffer range only once, because the read can be
1445 * uncached.
1446 */
1447 pipe_buffer_read(pipe, indirect->buffer, indirect->offset, data_size,
1448 data);
1449
1450 if (info->index_size) {
1451 /* Indexed multidraw. */
1452 unsigned index_bias0 = data[3];
1453 bool index_bias_same = true;
1454
1455 /* If we invoke the translate path, we have to split the multidraw. */
1456 if (incompatible_vb_mask ||
1457 mgr->ve->incompatible_elem_mask) {
1458 u_vbuf_split_indexed_multidraw(mgr, &new_info, drawid_offset, data,
1459 indirect->stride, draw_count);
1460 free(data);
1461 return;
1462 }
1463
1464 /* See if index_bias is the same for all draws. */
1465 for (unsigned i = 1; i < draw_count; i++) {
1466 if (data[i * indirect->stride / 4 + 3] != index_bias0) {
1467 index_bias_same = false;
1468 break;
1469 }
1470 }
1471
1472 /* Split the multidraw if index_bias is different. */
1473 if (!index_bias_same) {
1474 u_vbuf_split_indexed_multidraw(mgr, &new_info, drawid_offset, data,
1475 indirect->stride, draw_count);
1476 free(data);
1477 return;
1478 }
1479
1480 /* If we don't need to use the translate path and index_bias is
1481 * the same, we can process the multidraw with the time complexity
1482 * equal to 1 draw call (except for the index range computation).
1483 * We only need to compute the index range covering all draw calls
1484 * of the multidraw.
1485 *
1486 * The driver will not look at these values because indirect != NULL.
1487 * These values determine the user buffer bounds to upload.
1488 */
1489 new_draw.index_bias = index_bias0;
1490 new_info.index_bounds_valid = true;
1491 new_info.min_index = ~0u;
1492 new_info.max_index = 0;
1493 new_info.start_instance = ~0u;
1494 unsigned end_instance = 0;
1495
1496 struct pipe_transfer *transfer = NULL;
1497 const uint8_t *indices;
1498
1499 if (info->has_user_indices) {
1500 indices = (uint8_t*)info->index.user;
1501 } else {
1502 indices = (uint8_t*)pipe_buffer_map(pipe, info->index.resource,
1503 PIPE_MAP_READ, &transfer);
1504 }
1505
1506 for (unsigned i = 0; i < draw_count; i++) {
1507 unsigned offset = i * indirect->stride / 4;
1508 unsigned start = data[offset + 2];
1509 unsigned count = data[offset + 0];
1510 unsigned start_instance = data[offset + 4];
1511 unsigned instance_count = data[offset + 1];
1512
1513 if (!count || !instance_count)
1514 continue;
1515
1516 /* Update the ranges of instances. */
1517 new_info.start_instance = MIN2(new_info.start_instance,
1518 start_instance);
1519 end_instance = MAX2(end_instance, start_instance + instance_count);
1520
1521 /* Update the index range. */
1522 unsigned min, max;
1523 u_vbuf_get_minmax_index_mapped(&new_info, count,
1524 indices +
1525 new_info.index_size * start,
1526 &min, &max);
1527
1528 new_info.min_index = MIN2(new_info.min_index, min);
1529 new_info.max_index = MAX2(new_info.max_index, max);
1530 }
1531 free(data);
1532
1533 if (transfer)
1534 pipe_buffer_unmap(pipe, transfer);
1535
1536 /* Set the final instance count. */
1537 new_info.instance_count = end_instance - new_info.start_instance;
1538
1539 if (new_info.start_instance == ~0u || !new_info.instance_count)
1540 goto cleanup;
1541 } else {
1542 /* Non-indexed multidraw.
1543 *
1544 * Keep the draw call indirect and compute minimums & maximums,
1545 * which will determine the user buffer bounds to upload, but
1546 * the driver will not look at these values because indirect != NULL.
1547 *
1548 * This efficiently processes the multidraw with the time complexity
1549 * equal to 1 draw call.
1550 */
1551 new_draw.start = ~0u;
1552 new_info.start_instance = ~0u;
1553 unsigned end_vertex = 0;
1554 unsigned end_instance = 0;
1555
1556 for (unsigned i = 0; i < draw_count; i++) {
1557 unsigned offset = i * indirect->stride / 4;
1558 unsigned start = data[offset + 2];
1559 unsigned count = data[offset + 0];
1560 unsigned start_instance = data[offset + 3];
1561 unsigned instance_count = data[offset + 1];
1562
1563 new_draw.start = MIN2(new_draw.start, start);
1564 new_info.start_instance = MIN2(new_info.start_instance,
1565 start_instance);
1566
1567 end_vertex = MAX2(end_vertex, start + count);
1568 end_instance = MAX2(end_instance, start_instance + instance_count);
1569 }
1570 free(data);
1571
1572 /* Set the final counts. */
1573 new_draw.count = end_vertex - new_draw.start;
1574 new_info.instance_count = end_instance - new_info.start_instance;
1575
1576 if (new_draw.start == ~0u || !new_draw.count || !new_info.instance_count)
1577 goto cleanup;
1578 }
1579 } else {
1580 if ((!indirect && !new_draw.count) || !new_info.instance_count)
1581 goto cleanup;
1582 }
1583
1584 if (new_info.index_size) {
1585 /* See if anything needs to be done for per-vertex attribs. */
1586 if (u_vbuf_need_minmax_index(mgr)) {
1587 unsigned max_index;
1588
1589 if (new_info.index_bounds_valid) {
1590 min_index = new_info.min_index;
1591 max_index = new_info.max_index;
1592 } else {
1593 u_vbuf_get_minmax_index(mgr->pipe, &new_info, &new_draw,
1594 &min_index, &max_index);
1595 }
1596
1597 assert(min_index <= max_index);
1598
1599 start_vertex = min_index + new_draw.index_bias;
1600 num_vertices = max_index + 1 - min_index;
1601
1602 /* Primitive restart doesn't work when unrolling indices.
1603 * We would have to break this drawing operation into several ones. */
1604 /* Use some heuristic to see if unrolling indices improves
1605 * performance. */
1606 if (!indirect &&
1607 !new_info.primitive_restart &&
1608 util_is_vbo_upload_ratio_too_large(new_draw.count, num_vertices) &&
1609 !u_vbuf_mapping_vertex_buffer_blocks(mgr)) {
1610 unroll_indices = TRUE;
1611 user_vb_mask &= ~(mgr->nonzero_stride_vb_mask &
1612 mgr->ve->noninstance_vb_mask_any);
1613 }
1614 } else {
1615 /* Nothing to do for per-vertex attribs. */
1616 start_vertex = 0;
1617 num_vertices = 0;
1618 min_index = 0;
1619 }
1620 } else {
1621 start_vertex = new_draw.start;
1622 num_vertices = new_draw.count;
1623 min_index = 0;
1624 }
1625
1626 /* Translate vertices with non-native layouts or formats. */
1627 if (unroll_indices ||
1628 incompatible_vb_mask ||
1629 mgr->ve->incompatible_elem_mask) {
1630 if (!u_vbuf_translate_begin(mgr, &new_info, &new_draw,
1631 start_vertex, num_vertices,
1632 min_index, unroll_indices)) {
1633 debug_warn_once("u_vbuf_translate_begin() failed");
1634 goto cleanup;
1635 }
1636
1637 if (unroll_indices) {
1638 new_info.index_size = 0;
1639 new_draw.index_bias = 0;
1640 new_info.index_bounds_valid = true;
1641 new_info.min_index = 0;
1642 new_info.max_index = new_draw.count - 1;
1643 new_draw.start = 0;
1644 }
1645
1646 user_vb_mask &= ~(incompatible_vb_mask |
1647 mgr->ve->incompatible_vb_mask_all);
1648 }
1649
1650 /* Upload user buffers. */
1651 if (user_vb_mask) {
1652 if (u_vbuf_upload_buffers(mgr, start_vertex, num_vertices,
1653 new_info.start_instance,
1654 new_info.instance_count) != PIPE_OK) {
1655 debug_warn_once("u_vbuf_upload_buffers() failed");
1656 goto cleanup;
1657 }
1658
1659 mgr->dirty_real_vb_mask |= user_vb_mask;
1660 }
1661
1662 /*
1663 if (unroll_indices) {
1664 printf("unrolling indices: start_vertex = %i, num_vertices = %i\n",
1665 start_vertex, num_vertices);
1666 util_dump_draw_info(stdout, info);
1667 printf("\n");
1668 }
1669
1670 unsigned i;
1671 for (i = 0; i < mgr->nr_vertex_buffers; i++) {
1672 printf("input %i: ", i);
1673 util_dump_vertex_buffer(stdout, mgr->vertex_buffer+i);
1674 printf("\n");
1675 }
1676 for (i = 0; i < mgr->nr_real_vertex_buffers; i++) {
1677 printf("real %i: ", i);
1678 util_dump_vertex_buffer(stdout, mgr->real_vertex_buffer+i);
1679 printf("\n");
1680 }
1681 */
1682
1683 u_upload_unmap(pipe->stream_uploader);
1684 if (mgr->dirty_real_vb_mask)
1685 u_vbuf_set_driver_vertex_buffers(mgr);
1686
1687 if ((new_info.index_size == 1 && mgr->caps.rewrite_ubyte_ibs) ||
1688 (new_info.primitive_restart &&
1689 ((new_info.restart_index != fixed_restart_index && mgr->caps.rewrite_restart_index) ||
1690 !(mgr->caps.supported_restart_modes & BITFIELD_BIT(new_info.mode)))) ||
1691 !(mgr->caps.supported_prim_modes & BITFIELD_BIT(new_info.mode))) {
1692 util_primconvert_save_flatshade_first(mgr->pc, mgr->flatshade_first);
1693 util_primconvert_draw_vbo(mgr->pc, &new_info, drawid_offset, indirect, &new_draw, 1);
1694 } else
1695 pipe->draw_vbo(pipe, &new_info, drawid_offset, indirect, &new_draw, 1);
1696
1697 if (mgr->using_translate) {
1698 u_vbuf_translate_end(mgr);
1699 }
1700 return;
1701
1702 cleanup:
1703 if (info->take_index_buffer_ownership) {
1704 struct pipe_resource *indexbuf = info->index.resource;
1705 pipe_resource_reference(&indexbuf, NULL);
1706 }
1707 }
1708
u_vbuf_save_vertex_elements(struct u_vbuf * mgr)1709 void u_vbuf_save_vertex_elements(struct u_vbuf *mgr)
1710 {
1711 assert(!mgr->ve_saved);
1712 mgr->ve_saved = mgr->ve;
1713 }
1714
u_vbuf_restore_vertex_elements(struct u_vbuf * mgr)1715 void u_vbuf_restore_vertex_elements(struct u_vbuf *mgr)
1716 {
1717 if (mgr->ve != mgr->ve_saved) {
1718 struct pipe_context *pipe = mgr->pipe;
1719
1720 mgr->ve = mgr->ve_saved;
1721 pipe->bind_vertex_elements_state(pipe,
1722 mgr->ve ? mgr->ve->driver_cso : NULL);
1723 }
1724 mgr->ve_saved = NULL;
1725 }
1726