1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright 2003 VMware, Inc.
5 * Copyright 2009 VMware, Inc.
6 * All Rights Reserved.
7 * Copyright (C) 2016 Advanced Micro Devices, Inc.
8 *
9 * Permission is hereby granted, free of charge, to any person obtaining a
10 * copy of this software and associated documentation files (the "Software"),
11 * to deal in the Software without restriction, including without limitation
12 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 * and/or sell copies of the Software, and to permit persons to whom the
14 * Software is furnished to do so, subject to the following conditions:
15 *
16 * The above copyright notice and this permission notice (including the next
17 * paragraph) shall be included in all copies or substantial portions of the
18 * Software.
19 *
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
23 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
24 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
25 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
26 * USE OR OTHER DEALINGS IN THE SOFTWARE.
27 */
28
29 #include "main/glheader.h"
30 #include "main/context.h"
31 #include "main/varray.h"
32 #include "main/macros.h"
33 #include "main/sse_minmax.h"
34 #include "x86/common_x86_asm.h"
35 #include "util/hash_table.h"
36 #include "util/u_memory.h"
37
38
39 struct minmax_cache_key {
40 GLintptr offset;
41 GLuint count;
42 unsigned index_size;
43 };
44
45
46 struct minmax_cache_entry {
47 struct minmax_cache_key key;
48 GLuint min;
49 GLuint max;
50 };
51
52
53 static uint32_t
vbo_minmax_cache_hash(const struct minmax_cache_key * key)54 vbo_minmax_cache_hash(const struct minmax_cache_key *key)
55 {
56 return _mesa_hash_data(key, sizeof(*key));
57 }
58
59
60 static bool
vbo_minmax_cache_key_equal(const struct minmax_cache_key * a,const struct minmax_cache_key * b)61 vbo_minmax_cache_key_equal(const struct minmax_cache_key *a,
62 const struct minmax_cache_key *b)
63 {
64 return (a->offset == b->offset) && (a->count == b->count) &&
65 (a->index_size == b->index_size);
66 }
67
68
69 static void
vbo_minmax_cache_delete_entry(struct hash_entry * entry)70 vbo_minmax_cache_delete_entry(struct hash_entry *entry)
71 {
72 free(entry->data);
73 }
74
75
76 static GLboolean
vbo_use_minmax_cache(struct gl_buffer_object * bufferObj)77 vbo_use_minmax_cache(struct gl_buffer_object *bufferObj)
78 {
79 if (bufferObj->UsageHistory & (USAGE_TEXTURE_BUFFER |
80 USAGE_ATOMIC_COUNTER_BUFFER |
81 USAGE_SHADER_STORAGE_BUFFER |
82 USAGE_TRANSFORM_FEEDBACK_BUFFER |
83 USAGE_PIXEL_PACK_BUFFER |
84 USAGE_DISABLE_MINMAX_CACHE))
85 return GL_FALSE;
86
87 if ((bufferObj->Mappings[MAP_USER].AccessFlags &
88 (GL_MAP_PERSISTENT_BIT | GL_MAP_WRITE_BIT)) ==
89 (GL_MAP_PERSISTENT_BIT | GL_MAP_WRITE_BIT))
90 return GL_FALSE;
91
92 return GL_TRUE;
93 }
94
95
96 void
vbo_delete_minmax_cache(struct gl_buffer_object * bufferObj)97 vbo_delete_minmax_cache(struct gl_buffer_object *bufferObj)
98 {
99 _mesa_hash_table_destroy(bufferObj->MinMaxCache, vbo_minmax_cache_delete_entry);
100 bufferObj->MinMaxCache = NULL;
101 }
102
103
104 static GLboolean
vbo_get_minmax_cached(struct gl_buffer_object * bufferObj,unsigned index_size,GLintptr offset,GLuint count,GLuint * min_index,GLuint * max_index)105 vbo_get_minmax_cached(struct gl_buffer_object *bufferObj,
106 unsigned index_size, GLintptr offset, GLuint count,
107 GLuint *min_index, GLuint *max_index)
108 {
109 GLboolean found = GL_FALSE;
110 struct minmax_cache_key key;
111 uint32_t hash;
112 struct hash_entry *result;
113
114 if (!bufferObj->MinMaxCache)
115 return GL_FALSE;
116 if (!vbo_use_minmax_cache(bufferObj))
117 return GL_FALSE;
118
119 simple_mtx_lock(&bufferObj->MinMaxCacheMutex);
120
121 if (bufferObj->MinMaxCacheDirty) {
122 /* Disable the cache permanently for this BO if the number of hits
123 * is asymptotically less than the number of misses. This happens when
124 * applications use the BO for streaming.
125 *
126 * However, some initial optimism allows applications that interleave
127 * draw calls with glBufferSubData during warmup.
128 */
129 unsigned optimism = bufferObj->Size;
130 if (bufferObj->MinMaxCacheMissIndices > optimism &&
131 bufferObj->MinMaxCacheHitIndices < bufferObj->MinMaxCacheMissIndices - optimism) {
132 bufferObj->UsageHistory |= USAGE_DISABLE_MINMAX_CACHE;
133 vbo_delete_minmax_cache(bufferObj);
134 goto out_disable;
135 }
136
137 _mesa_hash_table_clear(bufferObj->MinMaxCache, vbo_minmax_cache_delete_entry);
138 bufferObj->MinMaxCacheDirty = false;
139 goto out_invalidate;
140 }
141
142 key.index_size = index_size;
143 key.offset = offset;
144 key.count = count;
145 hash = vbo_minmax_cache_hash(&key);
146 result = _mesa_hash_table_search_pre_hashed(bufferObj->MinMaxCache, hash, &key);
147 if (result) {
148 struct minmax_cache_entry *entry = result->data;
149 *min_index = entry->min;
150 *max_index = entry->max;
151 found = GL_TRUE;
152 }
153
154 out_invalidate:
155 if (found) {
156 /* The hit counter saturates so that we don't accidently disable the
157 * cache in a long-running program.
158 */
159 unsigned new_hit_count = bufferObj->MinMaxCacheHitIndices + count;
160
161 if (new_hit_count >= bufferObj->MinMaxCacheHitIndices)
162 bufferObj->MinMaxCacheHitIndices = new_hit_count;
163 else
164 bufferObj->MinMaxCacheHitIndices = ~(unsigned)0;
165 } else {
166 bufferObj->MinMaxCacheMissIndices += count;
167 }
168
169 out_disable:
170 simple_mtx_unlock(&bufferObj->MinMaxCacheMutex);
171 return found;
172 }
173
174
175 static void
vbo_minmax_cache_store(struct gl_context * ctx,struct gl_buffer_object * bufferObj,unsigned index_size,GLintptr offset,GLuint count,GLuint min,GLuint max)176 vbo_minmax_cache_store(struct gl_context *ctx,
177 struct gl_buffer_object *bufferObj,
178 unsigned index_size, GLintptr offset, GLuint count,
179 GLuint min, GLuint max)
180 {
181 struct minmax_cache_entry *entry;
182 struct hash_entry *table_entry;
183 uint32_t hash;
184
185 if (!vbo_use_minmax_cache(bufferObj))
186 return;
187
188 simple_mtx_lock(&bufferObj->MinMaxCacheMutex);
189
190 if (!bufferObj->MinMaxCache) {
191 bufferObj->MinMaxCache =
192 _mesa_hash_table_create(NULL,
193 (uint32_t (*)(const void *))vbo_minmax_cache_hash,
194 (bool (*)(const void *, const void *))vbo_minmax_cache_key_equal);
195 if (!bufferObj->MinMaxCache)
196 goto out;
197 }
198
199 entry = MALLOC_STRUCT(minmax_cache_entry);
200 if (!entry)
201 goto out;
202
203 entry->key.offset = offset;
204 entry->key.count = count;
205 entry->key.index_size = index_size;
206 entry->min = min;
207 entry->max = max;
208 hash = vbo_minmax_cache_hash(&entry->key);
209
210 table_entry = _mesa_hash_table_search_pre_hashed(bufferObj->MinMaxCache,
211 hash, &entry->key);
212 if (table_entry) {
213 /* It seems like this could happen when two contexts are rendering using
214 * the same buffer object from multiple threads.
215 */
216 _mesa_debug(ctx, "duplicate entry in minmax cache\n");
217 free(entry);
218 goto out;
219 }
220
221 table_entry = _mesa_hash_table_insert_pre_hashed(bufferObj->MinMaxCache,
222 hash, &entry->key, entry);
223 if (!table_entry)
224 free(entry);
225
226 out:
227 simple_mtx_unlock(&bufferObj->MinMaxCacheMutex);
228 }
229
230
231 void
vbo_get_minmax_index_mapped(unsigned count,unsigned index_size,unsigned restartIndex,bool restart,const void * indices,unsigned * min_index,unsigned * max_index)232 vbo_get_minmax_index_mapped(unsigned count, unsigned index_size,
233 unsigned restartIndex, bool restart,
234 const void *indices,
235 unsigned *min_index, unsigned *max_index)
236 {
237 switch (index_size) {
238 case 4: {
239 const GLuint *ui_indices = (const GLuint *)indices;
240 GLuint max_ui = 0;
241 GLuint min_ui = ~0U;
242 if (restart) {
243 for (unsigned i = 0; i < count; i++) {
244 if (ui_indices[i] != restartIndex) {
245 if (ui_indices[i] > max_ui) max_ui = ui_indices[i];
246 if (ui_indices[i] < min_ui) min_ui = ui_indices[i];
247 }
248 }
249 }
250 else {
251 #if defined(USE_SSE41)
252 if (cpu_has_sse4_1) {
253 _mesa_uint_array_min_max(ui_indices, &min_ui, &max_ui, count);
254 }
255 else
256 #endif
257 for (unsigned i = 0; i < count; i++) {
258 if (ui_indices[i] > max_ui) max_ui = ui_indices[i];
259 if (ui_indices[i] < min_ui) min_ui = ui_indices[i];
260 }
261 }
262 *min_index = min_ui;
263 *max_index = max_ui;
264 break;
265 }
266 case 2: {
267 const GLushort *us_indices = (const GLushort *)indices;
268 GLuint max_us = 0;
269 GLuint min_us = ~0U;
270 if (restart) {
271 for (unsigned i = 0; i < count; i++) {
272 if (us_indices[i] != restartIndex) {
273 if (us_indices[i] > max_us) max_us = us_indices[i];
274 if (us_indices[i] < min_us) min_us = us_indices[i];
275 }
276 }
277 }
278 else {
279 for (unsigned i = 0; i < count; i++) {
280 if (us_indices[i] > max_us) max_us = us_indices[i];
281 if (us_indices[i] < min_us) min_us = us_indices[i];
282 }
283 }
284 *min_index = min_us;
285 *max_index = max_us;
286 break;
287 }
288 case 1: {
289 const GLubyte *ub_indices = (const GLubyte *)indices;
290 GLuint max_ub = 0;
291 GLuint min_ub = ~0U;
292 if (restart) {
293 for (unsigned i = 0; i < count; i++) {
294 if (ub_indices[i] != restartIndex) {
295 if (ub_indices[i] > max_ub) max_ub = ub_indices[i];
296 if (ub_indices[i] < min_ub) min_ub = ub_indices[i];
297 }
298 }
299 }
300 else {
301 for (unsigned i = 0; i < count; i++) {
302 if (ub_indices[i] > max_ub) max_ub = ub_indices[i];
303 if (ub_indices[i] < min_ub) min_ub = ub_indices[i];
304 }
305 }
306 *min_index = min_ub;
307 *max_index = max_ub;
308 break;
309 }
310 default:
311 unreachable("not reached");
312 }
313 }
314
315
316 /**
317 * Compute min and max elements by scanning the index buffer for
318 * glDraw[Range]Elements() calls.
319 * If primitive restart is enabled, we need to ignore restart
320 * indexes when computing min/max.
321 */
322 static void
vbo_get_minmax_index(struct gl_context * ctx,const struct _mesa_prim * prim,const struct _mesa_index_buffer * ib,GLuint * min_index,GLuint * max_index,const GLuint count)323 vbo_get_minmax_index(struct gl_context *ctx,
324 const struct _mesa_prim *prim,
325 const struct _mesa_index_buffer *ib,
326 GLuint *min_index, GLuint *max_index,
327 const GLuint count)
328 {
329 const GLboolean restart = ctx->Array._PrimitiveRestart;
330 const GLuint restartIndex =
331 ctx->Array._RestartIndex[(1 << ib->index_size_shift) - 1];
332 const char *indices;
333 GLintptr offset = 0;
334
335 indices = (char *) ib->ptr + (prim->start << ib->index_size_shift);
336 if (ib->obj) {
337 GLsizeiptr size = MIN2(count << ib->index_size_shift, ib->obj->Size);
338
339 if (vbo_get_minmax_cached(ib->obj, 1 << ib->index_size_shift, (GLintptr) indices,
340 count, min_index, max_index))
341 return;
342
343 offset = (GLintptr) indices;
344 indices = ctx->Driver.MapBufferRange(ctx, offset, size,
345 GL_MAP_READ_BIT, ib->obj,
346 MAP_INTERNAL);
347 }
348
349 vbo_get_minmax_index_mapped(count, 1 << ib->index_size_shift, restartIndex,
350 restart, indices, min_index, max_index);
351
352 if (ib->obj) {
353 vbo_minmax_cache_store(ctx, ib->obj, 1 << ib->index_size_shift, offset,
354 count, *min_index, *max_index);
355 ctx->Driver.UnmapBuffer(ctx, ib->obj, MAP_INTERNAL);
356 }
357 }
358
359 /**
360 * Compute min and max elements for nr_prims
361 */
362 void
vbo_get_minmax_indices(struct gl_context * ctx,const struct _mesa_prim * prims,const struct _mesa_index_buffer * ib,GLuint * min_index,GLuint * max_index,GLuint nr_prims)363 vbo_get_minmax_indices(struct gl_context *ctx,
364 const struct _mesa_prim *prims,
365 const struct _mesa_index_buffer *ib,
366 GLuint *min_index,
367 GLuint *max_index,
368 GLuint nr_prims)
369 {
370 GLuint tmp_min, tmp_max;
371 GLuint i;
372 GLuint count;
373
374 *min_index = ~0;
375 *max_index = 0;
376
377 for (i = 0; i < nr_prims; i++) {
378 const struct _mesa_prim *start_prim;
379
380 start_prim = &prims[i];
381 count = start_prim->count;
382 /* Do combination if possible to reduce map/unmap count */
383 while ((i + 1 < nr_prims) &&
384 (prims[i].start + prims[i].count == prims[i+1].start)) {
385 count += prims[i+1].count;
386 i++;
387 }
388 vbo_get_minmax_index(ctx, start_prim, ib, &tmp_min, &tmp_max, count);
389 *min_index = MIN2(*min_index, tmp_min);
390 *max_index = MAX2(*max_index, tmp_max);
391 }
392 }
393