1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright 2003 VMware, Inc.
5 * Copyright 2009 VMware, Inc.
6 * All Rights Reserved.
7 * Copyright (C) 2016 Advanced Micro Devices, Inc.
8 *
9 * Permission is hereby granted, free of charge, to any person obtaining a
10 * copy of this software and associated documentation files (the "Software"),
11 * to deal in the Software without restriction, including without limitation
12 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 * and/or sell copies of the Software, and to permit persons to whom the
14 * Software is furnished to do so, subject to the following conditions:
15 *
16 * The above copyright notice and this permission notice (including the next
17 * paragraph) shall be included in all copies or substantial portions of the
18 * Software.
19 *
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
23 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
24 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
25 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
26 * USE OR OTHER DEALINGS IN THE SOFTWARE.
27 */
28
29 #include "main/glheader.h"
30 #include "main/context.h"
31 #include "main/varray.h"
32 #include "main/macros.h"
33 #include "main/sse_minmax.h"
34 #include "x86/common_x86_asm.h"
35 #include "util/hash_table.h"
36
37
38 struct minmax_cache_key {
39 GLintptr offset;
40 GLuint count;
41 unsigned index_size;
42 };
43
44
45 struct minmax_cache_entry {
46 struct minmax_cache_key key;
47 GLuint min;
48 GLuint max;
49 };
50
51
52 static uint32_t
vbo_minmax_cache_hash(const struct minmax_cache_key * key)53 vbo_minmax_cache_hash(const struct minmax_cache_key *key)
54 {
55 return _mesa_hash_data(key, sizeof(*key));
56 }
57
58
59 static bool
vbo_minmax_cache_key_equal(const struct minmax_cache_key * a,const struct minmax_cache_key * b)60 vbo_minmax_cache_key_equal(const struct minmax_cache_key *a,
61 const struct minmax_cache_key *b)
62 {
63 return (a->offset == b->offset) && (a->count == b->count) &&
64 (a->index_size == b->index_size);
65 }
66
67
68 static void
vbo_minmax_cache_delete_entry(struct hash_entry * entry)69 vbo_minmax_cache_delete_entry(struct hash_entry *entry)
70 {
71 free(entry->data);
72 }
73
74
75 static GLboolean
vbo_use_minmax_cache(struct gl_buffer_object * bufferObj)76 vbo_use_minmax_cache(struct gl_buffer_object *bufferObj)
77 {
78 if (bufferObj->UsageHistory & (USAGE_TEXTURE_BUFFER |
79 USAGE_ATOMIC_COUNTER_BUFFER |
80 USAGE_SHADER_STORAGE_BUFFER |
81 USAGE_TRANSFORM_FEEDBACK_BUFFER |
82 USAGE_PIXEL_PACK_BUFFER |
83 USAGE_DISABLE_MINMAX_CACHE))
84 return GL_FALSE;
85
86 if ((bufferObj->Mappings[MAP_USER].AccessFlags &
87 (GL_MAP_PERSISTENT_BIT | GL_MAP_WRITE_BIT)) ==
88 (GL_MAP_PERSISTENT_BIT | GL_MAP_WRITE_BIT))
89 return GL_FALSE;
90
91 return GL_TRUE;
92 }
93
94
95 void
vbo_delete_minmax_cache(struct gl_buffer_object * bufferObj)96 vbo_delete_minmax_cache(struct gl_buffer_object *bufferObj)
97 {
98 _mesa_hash_table_destroy(bufferObj->MinMaxCache, vbo_minmax_cache_delete_entry);
99 bufferObj->MinMaxCache = NULL;
100 }
101
102
103 static GLboolean
vbo_get_minmax_cached(struct gl_buffer_object * bufferObj,unsigned index_size,GLintptr offset,GLuint count,GLuint * min_index,GLuint * max_index)104 vbo_get_minmax_cached(struct gl_buffer_object *bufferObj,
105 unsigned index_size, GLintptr offset, GLuint count,
106 GLuint *min_index, GLuint *max_index)
107 {
108 GLboolean found = GL_FALSE;
109 struct minmax_cache_key key;
110 uint32_t hash;
111 struct hash_entry *result;
112
113 if (!bufferObj->MinMaxCache)
114 return GL_FALSE;
115 if (!vbo_use_minmax_cache(bufferObj))
116 return GL_FALSE;
117
118 simple_mtx_lock(&bufferObj->Mutex);
119
120 if (bufferObj->MinMaxCacheDirty) {
121 /* Disable the cache permanently for this BO if the number of hits
122 * is asymptotically less than the number of misses. This happens when
123 * applications use the BO for streaming.
124 *
125 * However, some initial optimism allows applications that interleave
126 * draw calls with glBufferSubData during warmup.
127 */
128 unsigned optimism = bufferObj->Size;
129 if (bufferObj->MinMaxCacheMissIndices > optimism &&
130 bufferObj->MinMaxCacheHitIndices < bufferObj->MinMaxCacheMissIndices - optimism) {
131 bufferObj->UsageHistory |= USAGE_DISABLE_MINMAX_CACHE;
132 vbo_delete_minmax_cache(bufferObj);
133 goto out_disable;
134 }
135
136 _mesa_hash_table_clear(bufferObj->MinMaxCache, vbo_minmax_cache_delete_entry);
137 bufferObj->MinMaxCacheDirty = false;
138 goto out_invalidate;
139 }
140
141 key.index_size = index_size;
142 key.offset = offset;
143 key.count = count;
144 hash = vbo_minmax_cache_hash(&key);
145 result = _mesa_hash_table_search_pre_hashed(bufferObj->MinMaxCache, hash, &key);
146 if (result) {
147 struct minmax_cache_entry *entry = result->data;
148 *min_index = entry->min;
149 *max_index = entry->max;
150 found = GL_TRUE;
151 }
152
153 out_invalidate:
154 if (found) {
155 /* The hit counter saturates so that we don't accidently disable the
156 * cache in a long-running program.
157 */
158 unsigned new_hit_count = bufferObj->MinMaxCacheHitIndices + count;
159
160 if (new_hit_count >= bufferObj->MinMaxCacheHitIndices)
161 bufferObj->MinMaxCacheHitIndices = new_hit_count;
162 else
163 bufferObj->MinMaxCacheHitIndices = ~(unsigned)0;
164 } else {
165 bufferObj->MinMaxCacheMissIndices += count;
166 }
167
168 out_disable:
169 simple_mtx_unlock(&bufferObj->Mutex);
170 return found;
171 }
172
173
174 static void
vbo_minmax_cache_store(struct gl_context * ctx,struct gl_buffer_object * bufferObj,unsigned index_size,GLintptr offset,GLuint count,GLuint min,GLuint max)175 vbo_minmax_cache_store(struct gl_context *ctx,
176 struct gl_buffer_object *bufferObj,
177 unsigned index_size, GLintptr offset, GLuint count,
178 GLuint min, GLuint max)
179 {
180 struct minmax_cache_entry *entry;
181 struct hash_entry *table_entry;
182 uint32_t hash;
183
184 if (!vbo_use_minmax_cache(bufferObj))
185 return;
186
187 simple_mtx_lock(&bufferObj->Mutex);
188
189 if (!bufferObj->MinMaxCache) {
190 bufferObj->MinMaxCache =
191 _mesa_hash_table_create(NULL,
192 (uint32_t (*)(const void *))vbo_minmax_cache_hash,
193 (bool (*)(const void *, const void *))vbo_minmax_cache_key_equal);
194 if (!bufferObj->MinMaxCache)
195 goto out;
196 }
197
198 entry = MALLOC_STRUCT(minmax_cache_entry);
199 if (!entry)
200 goto out;
201
202 entry->key.offset = offset;
203 entry->key.count = count;
204 entry->key.index_size = index_size;
205 entry->min = min;
206 entry->max = max;
207 hash = vbo_minmax_cache_hash(&entry->key);
208
209 table_entry = _mesa_hash_table_search_pre_hashed(bufferObj->MinMaxCache,
210 hash, &entry->key);
211 if (table_entry) {
212 /* It seems like this could happen when two contexts are rendering using
213 * the same buffer object from multiple threads.
214 */
215 _mesa_debug(ctx, "duplicate entry in minmax cache\n");
216 free(entry);
217 goto out;
218 }
219
220 table_entry = _mesa_hash_table_insert_pre_hashed(bufferObj->MinMaxCache,
221 hash, &entry->key, entry);
222 if (!table_entry)
223 free(entry);
224
225 out:
226 simple_mtx_unlock(&bufferObj->Mutex);
227 }
228
229
230 /**
231 * Compute min and max elements by scanning the index buffer for
232 * glDraw[Range]Elements() calls.
233 * If primitive restart is enabled, we need to ignore restart
234 * indexes when computing min/max.
235 */
236 static void
vbo_get_minmax_index(struct gl_context * ctx,const struct _mesa_prim * prim,const struct _mesa_index_buffer * ib,GLuint * min_index,GLuint * max_index,const GLuint count)237 vbo_get_minmax_index(struct gl_context *ctx,
238 const struct _mesa_prim *prim,
239 const struct _mesa_index_buffer *ib,
240 GLuint *min_index, GLuint *max_index,
241 const GLuint count)
242 {
243 const GLboolean restart = ctx->Array._PrimitiveRestart;
244 const GLuint restartIndex =
245 _mesa_primitive_restart_index(ctx, ib->index_size);
246 const char *indices;
247 GLuint i;
248 GLintptr offset = 0;
249
250 indices = (char *) ib->ptr + prim->start * ib->index_size;
251 if (_mesa_is_bufferobj(ib->obj)) {
252 GLsizeiptr size = MIN2(count * ib->index_size, ib->obj->Size);
253
254 if (vbo_get_minmax_cached(ib->obj, ib->index_size, (GLintptr) indices,
255 count, min_index, max_index))
256 return;
257
258 offset = (GLintptr) indices;
259 indices = ctx->Driver.MapBufferRange(ctx, offset, size,
260 GL_MAP_READ_BIT, ib->obj,
261 MAP_INTERNAL);
262 }
263
264 switch (ib->index_size) {
265 case 4: {
266 const GLuint *ui_indices = (const GLuint *)indices;
267 GLuint max_ui = 0;
268 GLuint min_ui = ~0U;
269 if (restart) {
270 for (i = 0; i < count; i++) {
271 if (ui_indices[i] != restartIndex) {
272 if (ui_indices[i] > max_ui) max_ui = ui_indices[i];
273 if (ui_indices[i] < min_ui) min_ui = ui_indices[i];
274 }
275 }
276 }
277 else {
278 #if defined(USE_SSE41)
279 if (cpu_has_sse4_1) {
280 _mesa_uint_array_min_max(ui_indices, &min_ui, &max_ui, count);
281 }
282 else
283 #endif
284 for (i = 0; i < count; i++) {
285 if (ui_indices[i] > max_ui) max_ui = ui_indices[i];
286 if (ui_indices[i] < min_ui) min_ui = ui_indices[i];
287 }
288 }
289 *min_index = min_ui;
290 *max_index = max_ui;
291 break;
292 }
293 case 2: {
294 const GLushort *us_indices = (const GLushort *)indices;
295 GLuint max_us = 0;
296 GLuint min_us = ~0U;
297 if (restart) {
298 for (i = 0; i < count; i++) {
299 if (us_indices[i] != restartIndex) {
300 if (us_indices[i] > max_us) max_us = us_indices[i];
301 if (us_indices[i] < min_us) min_us = us_indices[i];
302 }
303 }
304 }
305 else {
306 for (i = 0; i < count; i++) {
307 if (us_indices[i] > max_us) max_us = us_indices[i];
308 if (us_indices[i] < min_us) min_us = us_indices[i];
309 }
310 }
311 *min_index = min_us;
312 *max_index = max_us;
313 break;
314 }
315 case 1: {
316 const GLubyte *ub_indices = (const GLubyte *)indices;
317 GLuint max_ub = 0;
318 GLuint min_ub = ~0U;
319 if (restart) {
320 for (i = 0; i < count; i++) {
321 if (ub_indices[i] != restartIndex) {
322 if (ub_indices[i] > max_ub) max_ub = ub_indices[i];
323 if (ub_indices[i] < min_ub) min_ub = ub_indices[i];
324 }
325 }
326 }
327 else {
328 for (i = 0; i < count; i++) {
329 if (ub_indices[i] > max_ub) max_ub = ub_indices[i];
330 if (ub_indices[i] < min_ub) min_ub = ub_indices[i];
331 }
332 }
333 *min_index = min_ub;
334 *max_index = max_ub;
335 break;
336 }
337 default:
338 unreachable("not reached");
339 }
340
341 if (_mesa_is_bufferobj(ib->obj)) {
342 vbo_minmax_cache_store(ctx, ib->obj, ib->index_size, offset,
343 count, *min_index, *max_index);
344 ctx->Driver.UnmapBuffer(ctx, ib->obj, MAP_INTERNAL);
345 }
346 }
347
348 /**
349 * Compute min and max elements for nr_prims
350 */
351 void
vbo_get_minmax_indices(struct gl_context * ctx,const struct _mesa_prim * prims,const struct _mesa_index_buffer * ib,GLuint * min_index,GLuint * max_index,GLuint nr_prims)352 vbo_get_minmax_indices(struct gl_context *ctx,
353 const struct _mesa_prim *prims,
354 const struct _mesa_index_buffer *ib,
355 GLuint *min_index,
356 GLuint *max_index,
357 GLuint nr_prims)
358 {
359 GLuint tmp_min, tmp_max;
360 GLuint i;
361 GLuint count;
362
363 *min_index = ~0;
364 *max_index = 0;
365
366 for (i = 0; i < nr_prims; i++) {
367 const struct _mesa_prim *start_prim;
368
369 start_prim = &prims[i];
370 count = start_prim->count;
371 /* Do combination if possible to reduce map/unmap count */
372 while ((i + 1 < nr_prims) &&
373 (prims[i].start + prims[i].count == prims[i+1].start)) {
374 count += prims[i+1].count;
375 i++;
376 }
377 vbo_get_minmax_index(ctx, start_prim, ib, &tmp_min, &tmp_max, count);
378 *min_index = MIN2(*min_index, tmp_min);
379 *max_index = MAX2(*max_index, tmp_max);
380 }
381 }
382