1
2 /*
3 * Mesa 3-D graphics library
4 *
5 * Copyright (C) 1999-2006 Brian Paul All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
23 * OTHER DEALINGS IN THE SOFTWARE.
24 *
25 * Authors:
26 * Keith Whitwell <keithw@vmware.com>
27 */
28
29 /* Split indexed primitives with per-vertex copying.
30 */
31
32 #include <stdio.h>
33
34 #include "main/glheader.h"
35 #include "main/bufferobj.h"
36
37 #include "main/glformats.h"
38 #include "main/macros.h"
39 #include "main/mtypes.h"
40 #include "main/varray.h"
41 #include "vbo/vbo.h"
42
43 #include "t_split.h"
44 #include "tnl.h"
45
46
47 #define ELT_TABLE_SIZE 16
48
49 /**
50 * Used for vertex-level splitting of indexed buffers. Note that
51 * non-indexed primitives may be converted to indexed in some cases
52 * (eg loops, fans) in order to use this splitting path.
53 */
54 struct copy_context {
55 struct gl_context *ctx;
56 const struct tnl_vertex_array *array;
57 const struct _mesa_prim *prim;
58 GLuint nr_prims;
59 const struct _mesa_index_buffer *ib;
60 tnl_draw_func draw;
61
62 const struct split_limits *limits;
63
64 struct {
65 GLuint attr;
66 GLuint size;
67 const struct tnl_vertex_array *array;
68 const GLubyte *src_ptr;
69
70 struct gl_vertex_buffer_binding dstbinding;
71 struct gl_array_attributes dstattribs;
72
73 } varying[VERT_ATTRIB_MAX];
74 GLuint nr_varying;
75
76 struct tnl_vertex_array dstarray[VERT_ATTRIB_MAX];
77 struct _mesa_index_buffer dstib;
78
79 GLuint *translated_elt_buf;
80 const GLuint *srcelt;
81
82 /** A baby hash table to avoid re-emitting (some) duplicate
83 * vertices when splitting indexed primitives.
84 */
85 struct {
86 GLuint in;
87 GLuint out;
88 } vert_cache[ELT_TABLE_SIZE];
89
90 GLuint vertex_size;
91 GLubyte *dstbuf;
92 GLubyte *dstptr; /**< dstptr == dstbuf + dstelt_max * vertsize */
93 GLuint dstbuf_size; /**< in vertices */
94 GLuint dstbuf_nr; /**< count of emitted vertices, also the largest value
95 * in dstelt. Our MaxIndex.
96 */
97
98 GLuint *dstelt;
99 GLuint dstelt_nr;
100 GLuint dstelt_size;
101
102 #define MAX_PRIM 32
103 struct _mesa_prim dstprim[MAX_PRIM];
104 GLuint dstprim_nr;
105 };
106
107
108 /**
109 * Shallow copy one vertex array to another.
110 */
111 static inline void
copy_vertex_array(struct tnl_vertex_array * dst,const struct tnl_vertex_array * src)112 copy_vertex_array(struct tnl_vertex_array *dst,
113 const struct tnl_vertex_array *src)
114 {
115 dst->VertexAttrib = src->VertexAttrib;
116 dst->BufferBinding = src->BufferBinding;
117 }
118
119
120 /**
121 * Starts returning true slightly before the buffer fills, to ensure
122 * that there is sufficient room for any remaining vertices to finish
123 * off the prim:
124 */
125 static GLboolean
check_flush(struct copy_context * copy)126 check_flush(struct copy_context *copy)
127 {
128 GLenum mode = copy->dstprim[copy->dstprim_nr].mode;
129
130 if (GL_TRIANGLE_STRIP == mode &&
131 copy->dstelt_nr & 1) { /* see bug9962 */
132 return GL_FALSE;
133 }
134
135 if (copy->dstbuf_nr + 4 > copy->dstbuf_size)
136 return GL_TRUE;
137
138 if (copy->dstelt_nr + 4 > copy->dstelt_size)
139 return GL_TRUE;
140
141 return GL_FALSE;
142 }
143
144
145 /**
146 * Dump the parameters/info for a vbo->draw() call.
147 */
148 static void
dump_draw_info(const struct tnl_vertex_array * arrays,const struct _mesa_prim * prims,GLuint nr_prims,const struct _mesa_index_buffer * ib)149 dump_draw_info(const struct tnl_vertex_array *arrays,
150 const struct _mesa_prim *prims,
151 GLuint nr_prims,
152 const struct _mesa_index_buffer *ib)
153 {
154 GLuint i, j;
155
156 printf("VBO Draw:\n");
157 for (i = 0; i < nr_prims; i++) {
158 printf("Prim %u of %u\n", i, nr_prims);
159 printf(" Prim mode 0x%x\n", prims[i].mode);
160 printf(" IB: %p\n", (void*) ib);
161 for (j = 0; j < VERT_ATTRIB_MAX; j++) {
162 const struct tnl_vertex_array *array = &arrays[j];
163 const struct gl_vertex_buffer_binding *binding
164 = array->BufferBinding;
165 const struct gl_array_attributes *attrib = array->VertexAttrib;
166 const GLubyte *ptr = _mesa_vertex_attrib_address(attrib, binding);
167 printf(" array %d at %p:\n", j, (void*) &arrays[j]);
168 printf(" ptr %p, size %d, type 0x%x, stride %d\n",
169 ptr, attrib->Format.Size, attrib->Format.Type, binding->Stride);
170 if (0) {
171 GLint k = prims[i].start + prims[i].count - 1;
172 GLfloat *last = (GLfloat *) (ptr + binding->Stride * k);
173 printf(" last: %f %f %f\n",
174 last[0], last[1], last[2]);
175 }
176 }
177 }
178 }
179
180
181 static void
flush(struct copy_context * copy)182 flush(struct copy_context *copy)
183 {
184 struct gl_context *ctx = copy->ctx;
185 GLuint i;
186
187 /* Set some counters:
188 */
189 copy->dstib.count = copy->dstelt_nr;
190
191 #if 0
192 dump_draw_info(copy->dstarray,
193 copy->dstprim,
194 copy->dstprim_nr,
195 ©->dstib);
196 #else
197 (void) dump_draw_info;
198 #endif
199
200 copy->draw(ctx,
201 copy->dstarray,
202 copy->dstprim,
203 copy->dstprim_nr,
204 ©->dstib,
205 GL_TRUE,
206 0,
207 copy->dstbuf_nr - 1,
208 1,
209 0);
210
211 /* Reset all pointers:
212 */
213 copy->dstprim_nr = 0;
214 copy->dstelt_nr = 0;
215 copy->dstbuf_nr = 0;
216 copy->dstptr = copy->dstbuf;
217
218 /* Clear the vertex cache:
219 */
220 for (i = 0; i < ELT_TABLE_SIZE; i++)
221 copy->vert_cache[i].in = ~0;
222 }
223
224
225 /**
226 * Called at begin of each primitive during replay.
227 */
228 static void
begin(struct copy_context * copy,GLenum mode,GLboolean begin_flag)229 begin(struct copy_context *copy, GLenum mode, GLboolean begin_flag)
230 {
231 struct _mesa_prim *prim = ©->dstprim[copy->dstprim_nr];
232
233 prim->mode = mode;
234 prim->begin = begin_flag;
235 }
236
237
238 /**
239 * Use a hashtable to attempt to identify recently-emitted vertices
240 * and avoid re-emitting them.
241 */
242 static GLuint
elt(struct copy_context * copy,GLuint elt_idx)243 elt(struct copy_context *copy, GLuint elt_idx)
244 {
245 GLuint elt = copy->srcelt[elt_idx] + copy->prim->basevertex;
246 GLuint slot = elt & (ELT_TABLE_SIZE-1);
247
248 /* Look up the incoming element in the vertex cache. Re-emit if
249 * necessary.
250 */
251 if (copy->vert_cache[slot].in != elt) {
252 GLubyte *csr = copy->dstptr;
253 GLuint i;
254
255 for (i = 0; i < copy->nr_varying; i++) {
256 const struct tnl_vertex_array *srcarray = copy->varying[i].array;
257 const struct gl_vertex_buffer_binding* srcbinding
258 = srcarray->BufferBinding;
259 const GLubyte *srcptr
260 = copy->varying[i].src_ptr + elt * srcbinding->Stride;
261
262 memcpy(csr, srcptr, copy->varying[i].size);
263 csr += copy->varying[i].size;
264
265 #ifdef NAN_CHECK
266 if (srcarray->Format.Type == GL_FLOAT) {
267 GLuint k;
268 GLfloat *f = (GLfloat *) srcptr;
269 for (k = 0; k < srcarray->Size; k++) {
270 assert(!util_is_inf_or_nan(f[k]));
271 assert(f[k] <= 1.0e20 && f[k] >= -1.0e20);
272 }
273 }
274 #endif
275
276 if (0) {
277 const GLuint *f = (const GLuint *)srcptr;
278 GLuint j;
279 printf(" varying %d: ", i);
280 for (j = 0; j < copy->varying[i].size / 4; j++)
281 printf("%x ", f[j]);
282 printf("\n");
283 }
284 }
285
286 copy->vert_cache[slot].in = elt;
287 copy->vert_cache[slot].out = copy->dstbuf_nr++;
288 copy->dstptr += copy->vertex_size;
289
290 assert(csr == copy->dstptr);
291 assert(copy->dstptr == (copy->dstbuf +
292 copy->dstbuf_nr * copy->vertex_size));
293 }
294
295 copy->dstelt[copy->dstelt_nr++] = copy->vert_cache[slot].out;
296 return check_flush(copy);
297 }
298
299
300 /**
301 * Called at end of each primitive during replay.
302 */
303 static void
end(struct copy_context * copy,GLboolean end_flag)304 end(struct copy_context *copy, GLboolean end_flag)
305 {
306 struct _mesa_prim *prim = ©->dstprim[copy->dstprim_nr];
307
308 prim->end = end_flag;
309 prim->count = copy->dstelt_nr - prim->start;
310
311 if (++copy->dstprim_nr == MAX_PRIM || check_flush(copy)) {
312 flush(copy);
313 }
314 }
315
316
317 static void
replay_elts(struct copy_context * copy)318 replay_elts(struct copy_context *copy)
319 {
320 GLuint i, j, k;
321 GLboolean split;
322
323 for (i = 0; i < copy->nr_prims; i++) {
324 const struct _mesa_prim *prim = ©->prim[i];
325 const GLuint start = prim->start;
326 GLuint first, incr;
327
328 switch (prim->mode) {
329 case GL_LINE_LOOP:
330 /* Convert to linestrip and emit the final vertex explicitly,
331 * but only in the resultant strip that requires it.
332 */
333 j = 0;
334 while (j != prim->count) {
335 begin(copy, GL_LINE_STRIP, prim->begin && j == 0);
336
337 for (split = GL_FALSE; j != prim->count && !split; j++)
338 split = elt(copy, start + j);
339
340 if (j == prim->count) {
341 /* Done, emit final line. Split doesn't matter as
342 * it is always raised a bit early so we can emit
343 * the last verts if necessary!
344 */
345 if (prim->end)
346 (void)elt(copy, start + 0);
347
348 end(copy, prim->end);
349 }
350 else {
351 /* Wrap
352 */
353 assert(split);
354 end(copy, 0);
355 j--;
356 }
357 }
358 break;
359
360 case GL_TRIANGLE_FAN:
361 case GL_POLYGON:
362 j = 2;
363 while (j != prim->count) {
364 begin(copy, prim->mode, prim->begin && j == 0);
365
366 split = elt(copy, start+0);
367 assert(!split);
368
369 split = elt(copy, start+j-1);
370 assert(!split);
371
372 for (; j != prim->count && !split; j++)
373 split = elt(copy, start+j);
374
375 end(copy, prim->end && j == prim->count);
376
377 if (j != prim->count) {
378 /* Wrapped the primitive, need to repeat some vertices:
379 */
380 j -= 1;
381 }
382 }
383 break;
384
385 default:
386 (void)_tnl_split_prim_inplace(prim->mode, &first, &incr);
387
388 j = 0;
389 while (j != prim->count) {
390
391 begin(copy, prim->mode, prim->begin && j == 0);
392
393 split = 0;
394 for (k = 0; k < first; k++, j++)
395 split |= elt(copy, start+j);
396
397 assert(!split);
398
399 for (; j != prim->count && !split;)
400 for (k = 0; k < incr; k++, j++)
401 split |= elt(copy, start+j);
402
403 end(copy, prim->end && j == prim->count);
404
405 if (j != prim->count) {
406 /* Wrapped the primitive, need to repeat some vertices:
407 */
408 assert(j > first - incr);
409 j -= (first - incr);
410 }
411 }
412 break;
413 }
414 }
415
416 if (copy->dstprim_nr)
417 flush(copy);
418 }
419
420
421 static void
replay_init(struct copy_context * copy)422 replay_init(struct copy_context *copy)
423 {
424 struct gl_context *ctx = copy->ctx;
425 GLuint i;
426 GLuint offset;
427 const GLvoid *srcptr;
428
429 /* Make a list of varying attributes and their vbo's. Also
430 * calculate vertex size.
431 */
432 copy->vertex_size = 0;
433 for (i = 0; i < VERT_ATTRIB_MAX; i++) {
434 const struct tnl_vertex_array *array = ©->array[i];
435 const struct gl_vertex_buffer_binding *binding = array->BufferBinding;
436
437 if (binding->Stride == 0) {
438 copy_vertex_array(©->dstarray[i], array);
439 }
440 else {
441 const struct gl_array_attributes *attrib = array->VertexAttrib;
442 struct gl_buffer_object *vbo = binding->BufferObj;
443 const GLubyte *ptr = _mesa_vertex_attrib_address(attrib, binding);
444 GLuint j = copy->nr_varying++;
445
446 copy->varying[j].attr = i;
447 copy->varying[j].array = ©->array[i];
448 copy->varying[j].size = attrib->Format._ElementSize;
449 copy->vertex_size += attrib->Format._ElementSize;
450
451 if (vbo) {
452 if (!_mesa_bufferobj_mapped(vbo, MAP_INTERNAL)) {
453 ctx->Driver.MapBufferRange(ctx, 0, vbo->Size, GL_MAP_READ_BIT, vbo,
454 MAP_INTERNAL);
455 }
456
457 copy->varying[j].src_ptr =
458 ADD_POINTERS(vbo->Mappings[MAP_INTERNAL].Pointer, ptr);
459 } else {
460 copy->varying[j].src_ptr = ptr;
461 }
462
463 copy->dstarray[i].VertexAttrib = ©->varying[j].dstattribs;
464 copy->dstarray[i].BufferBinding = ©->varying[j].dstbinding;
465 }
466 }
467
468 /* There must always be an index buffer. Currently require the
469 * caller convert non-indexed prims to indexed. Could alternately
470 * do it internally.
471 */
472 if (copy->ib->obj) {
473 if (!_mesa_bufferobj_mapped(copy->ib->obj, MAP_INTERNAL))
474 ctx->Driver.MapBufferRange(ctx, 0, copy->ib->obj->Size, GL_MAP_READ_BIT,
475 copy->ib->obj, MAP_INTERNAL);
476
477 srcptr = (const GLubyte *)
478 ADD_POINTERS(copy->ib->obj->Mappings[MAP_INTERNAL].Pointer,
479 copy->ib->ptr);
480 } else
481 srcptr = copy->ib->ptr;
482
483 switch (copy->ib->index_size_shift) {
484 case 0:
485 copy->translated_elt_buf = malloc(sizeof(GLuint) * copy->ib->count);
486 copy->srcelt = copy->translated_elt_buf;
487
488 for (i = 0; i < copy->ib->count; i++)
489 copy->translated_elt_buf[i] = ((const GLubyte *)srcptr)[i];
490 break;
491
492 case 1:
493 copy->translated_elt_buf = malloc(sizeof(GLuint) * copy->ib->count);
494 copy->srcelt = copy->translated_elt_buf;
495
496 for (i = 0; i < copy->ib->count; i++)
497 copy->translated_elt_buf[i] = ((const GLushort *)srcptr)[i];
498 break;
499
500 case 2:
501 copy->translated_elt_buf = NULL;
502 copy->srcelt = (const GLuint *)srcptr;
503 break;
504 }
505
506 /* Figure out the maximum allowed vertex buffer size:
507 */
508 if (copy->vertex_size * copy->limits->max_verts <= copy->limits->max_vb_size) {
509 copy->dstbuf_size = copy->limits->max_verts;
510 }
511 else {
512 copy->dstbuf_size = copy->limits->max_vb_size / copy->vertex_size;
513 }
514
515 /* Allocate an output vertex buffer:
516 *
517 * XXX: This should be a VBO!
518 */
519 copy->dstbuf = malloc(copy->dstbuf_size * copy->vertex_size);
520 copy->dstptr = copy->dstbuf;
521
522 /* Setup new vertex arrays to point into the output buffer:
523 */
524 for (offset = 0, i = 0; i < copy->nr_varying; i++) {
525 const struct tnl_vertex_array *src = copy->varying[i].array;
526 const struct gl_array_attributes *srcattr = src->VertexAttrib;
527 struct tnl_vertex_array *dst = ©->dstarray[copy->varying[i].attr];
528 struct gl_vertex_buffer_binding *dstbind = ©->varying[i].dstbinding;
529 struct gl_array_attributes *dstattr = ©->varying[i].dstattribs;
530
531 dstattr->Format = srcattr->Format;
532 dstattr->Ptr = copy->dstbuf + offset;
533 dstbind->Stride = copy->vertex_size;
534 dstbind->BufferObj = NULL;
535 dst->BufferBinding = dstbind;
536 dst->VertexAttrib = dstattr;
537
538 offset += copy->varying[i].size;
539 }
540
541 /* Allocate an output element list:
542 */
543 copy->dstelt_size = MIN2(65536, copy->ib->count * 2 + 3);
544 copy->dstelt_size = MIN2(copy->dstelt_size, copy->limits->max_indices);
545 copy->dstelt = malloc(sizeof(GLuint) * copy->dstelt_size);
546 copy->dstelt_nr = 0;
547
548 /* Setup the new index buffer to point to the allocated element
549 * list:
550 */
551 copy->dstib.count = 0; /* duplicates dstelt_nr */
552 copy->dstib.index_size_shift = 2;
553 copy->dstib.obj = NULL;
554 copy->dstib.ptr = copy->dstelt;
555 }
556
557
558 /**
559 * Free up everything allocated during split/replay.
560 */
561 static void
replay_finish(struct copy_context * copy)562 replay_finish(struct copy_context *copy)
563 {
564 struct gl_context *ctx = copy->ctx;
565 GLuint i;
566
567 /* Free our vertex and index buffers */
568 free(copy->translated_elt_buf);
569 free(copy->dstbuf);
570 free(copy->dstelt);
571
572 /* Unmap VBO's */
573 for (i = 0; i < copy->nr_varying; i++) {
574 struct gl_buffer_object *vbo =
575 copy->varying[i].array->BufferBinding->BufferObj;
576 if (vbo && _mesa_bufferobj_mapped(vbo, MAP_INTERNAL))
577 ctx->Driver.UnmapBuffer(ctx, vbo, MAP_INTERNAL);
578 }
579
580 /* Unmap index buffer */
581 if (copy->ib->obj &&
582 _mesa_bufferobj_mapped(copy->ib->obj, MAP_INTERNAL)) {
583 ctx->Driver.UnmapBuffer(ctx, copy->ib->obj, MAP_INTERNAL);
584 }
585 }
586
587
588 /**
589 * Split VBO into smaller pieces, draw the pieces.
590 */
591 void
_tnl_split_copy(struct gl_context * ctx,const struct tnl_vertex_array * arrays,const struct _mesa_prim * prim,GLuint nr_prims,const struct _mesa_index_buffer * ib,tnl_draw_func draw,const struct split_limits * limits)592 _tnl_split_copy(struct gl_context *ctx,
593 const struct tnl_vertex_array *arrays,
594 const struct _mesa_prim *prim,
595 GLuint nr_prims,
596 const struct _mesa_index_buffer *ib,
597 tnl_draw_func draw,
598 const struct split_limits *limits)
599 {
600 struct copy_context copy;
601 GLuint i, this_nr_prims;
602
603 for (i = 0; i < nr_prims;) {
604 /* Our SW TNL pipeline doesn't handle basevertex yet, so bind_indices
605 * will rebase the elements to the basevertex, and we'll only
606 * emit strings of prims with the same basevertex in one draw call.
607 */
608 for (this_nr_prims = 1; i + this_nr_prims < nr_prims;
609 this_nr_prims++) {
610 if (prim[i].basevertex != prim[i + this_nr_prims].basevertex)
611 break;
612 }
613
614 memset(©, 0, sizeof(copy));
615
616 /* Require indexed primitives:
617 */
618 assert(ib);
619
620 copy.ctx = ctx;
621 copy.array = arrays;
622 copy.prim = &prim[i];
623 copy.nr_prims = this_nr_prims;
624 copy.ib = ib;
625 copy.draw = draw;
626 copy.limits = limits;
627
628 /* Clear the vertex cache:
629 */
630 for (i = 0; i < ELT_TABLE_SIZE; i++)
631 copy.vert_cache[i].in = ~0;
632
633 replay_init(©);
634 replay_elts(©);
635 replay_finish(©);
636 }
637 }
638