1 /*
2 * Copyright © 2012 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "main/glthread_marshal.h"
25 #include "main/dispatch.h"
26 #include "main/bufferobj.h"
27
28 /**
29 * Create an upload buffer. This is called from the app thread, so everything
30 * has to be thread-safe in the driver.
31 */
32 static struct gl_buffer_object *
new_upload_buffer(struct gl_context * ctx,GLsizeiptr size,uint8_t ** ptr)33 new_upload_buffer(struct gl_context *ctx, GLsizeiptr size, uint8_t **ptr)
34 {
35 assert(ctx->GLThread.SupportsBufferUploads);
36
37 struct gl_buffer_object *obj =
38 _mesa_bufferobj_alloc(ctx, -1);
39 if (!obj)
40 return NULL;
41
42 obj->Immutable = true;
43
44 if (!_mesa_bufferobj_data(ctx, GL_ARRAY_BUFFER, size, NULL,
45 GL_WRITE_ONLY,
46 GL_CLIENT_STORAGE_BIT | GL_MAP_WRITE_BIT,
47 obj)) {
48 _mesa_delete_buffer_object(ctx, obj);
49 return NULL;
50 }
51
52 *ptr = _mesa_bufferobj_map_range(ctx, 0, size,
53 GL_MAP_WRITE_BIT |
54 GL_MAP_UNSYNCHRONIZED_BIT |
55 MESA_MAP_THREAD_SAFE_BIT,
56 obj, MAP_GLTHREAD);
57 if (!*ptr) {
58 _mesa_delete_buffer_object(ctx, obj);
59 return NULL;
60 }
61
62 return obj;
63 }
64
65 void
_mesa_glthread_upload(struct gl_context * ctx,const void * data,GLsizeiptr size,unsigned * out_offset,struct gl_buffer_object ** out_buffer,uint8_t ** out_ptr)66 _mesa_glthread_upload(struct gl_context *ctx, const void *data,
67 GLsizeiptr size, unsigned *out_offset,
68 struct gl_buffer_object **out_buffer,
69 uint8_t **out_ptr)
70 {
71 struct glthread_state *glthread = &ctx->GLThread;
72 const unsigned default_size = 1024 * 1024;
73
74 if (unlikely(size > INT_MAX))
75 return;
76
77 /* The alignment was chosen arbitrarily. */
78 unsigned offset = align(glthread->upload_offset, 8);
79
80 /* Allocate a new buffer if needed. */
81 if (unlikely(!glthread->upload_buffer || offset + size > default_size)) {
82 /* If the size is greater than the buffer size, allocate a separate buffer
83 * just for this upload.
84 */
85 if (unlikely(size > default_size)) {
86 uint8_t *ptr;
87
88 assert(*out_buffer == NULL);
89 *out_buffer = new_upload_buffer(ctx, size, &ptr);
90 if (!*out_buffer)
91 return;
92
93 *out_offset = 0;
94 if (data)
95 memcpy(ptr, data, size);
96 else
97 *out_ptr = ptr;
98 return;
99 }
100
101 if (glthread->upload_buffer_private_refcount > 0) {
102 p_atomic_add(&glthread->upload_buffer->RefCount,
103 -glthread->upload_buffer_private_refcount);
104 glthread->upload_buffer_private_refcount = 0;
105 }
106 _mesa_reference_buffer_object(ctx, &glthread->upload_buffer, NULL);
107 glthread->upload_buffer =
108 new_upload_buffer(ctx, default_size, &glthread->upload_ptr);
109 glthread->upload_offset = 0;
110 offset = 0;
111
112 /* Since atomic operations are very very slow when 2 threads are not
113 * sharing one L3 cache (which can happen on AMD Zen), prevent using
114 * atomics as follows:
115 *
116 * This function has to return a buffer reference to the caller.
117 * Instead of atomic_inc for every call, it does all possible future
118 * increments in advance when the upload buffer is allocated.
119 * The maximum number of times the function can be called per upload
120 * buffer is default_size, because the minimum allocation size is 1.
121 * Therefore the function can only return default_size number of
122 * references at most, so we will never need more. This is the number
123 * that is added to RefCount at allocation.
124 *
125 * upload_buffer_private_refcount tracks how many buffer references
126 * are left to return to callers. If the buffer is full and there are
127 * still references left, they are atomically subtracted from RefCount
128 * before the buffer is unreferenced.
129 *
130 * This can increase performance by 20%.
131 */
132 glthread->upload_buffer->RefCount += default_size;
133 glthread->upload_buffer_private_refcount = default_size;
134 }
135
136 /* Upload data. */
137 if (data)
138 memcpy(glthread->upload_ptr + offset, data, size);
139 else
140 *out_ptr = glthread->upload_ptr + offset;
141
142 glthread->upload_offset = offset + size;
143 *out_offset = offset;
144
145 assert(*out_buffer == NULL);
146 assert(glthread->upload_buffer_private_refcount > 0);
147 *out_buffer = glthread->upload_buffer;
148 glthread->upload_buffer_private_refcount--;
149 }
150
151 /** Tracks the current bindings for the vertex array and index array buffers.
152 *
153 * This is part of what we need to enable glthread on compat-GL contexts that
154 * happen to use VBOs, without also supporting the full tracking of VBO vs
155 * user vertex array bindings per attribute on each vertex array for
156 * determining what to upload at draw call time.
157 *
158 * Note that GL core makes it so that a buffer binding with an invalid handle
159 * in the "buffer" parameter will throw an error, and then a
160 * glVertexAttribPointer() that followsmight not end up pointing at a VBO.
161 * However, in GL core the draw call would throw an error as well, so we don't
162 * really care if our tracking is wrong for this case -- we never need to
163 * marshal user data for draw calls, and the unmarshal will just generate an
164 * error or not as appropriate.
165 *
166 * For compatibility GL, we do need to accurately know whether the draw call
167 * on the unmarshal side will dereference a user pointer or load data from a
168 * VBO per vertex. That would make it seem like we need to track whether a
169 * "buffer" is valid, so that we can know when an error will be generated
170 * instead of updating the binding. However, compat GL has the ridiculous
171 * feature that if you pass a bad name, it just gens a buffer object for you,
172 * so we escape without having to know if things are valid or not.
173 */
174 void
_mesa_glthread_BindBuffer(struct gl_context * ctx,GLenum target,GLuint buffer)175 _mesa_glthread_BindBuffer(struct gl_context *ctx, GLenum target, GLuint buffer)
176 {
177 struct glthread_state *glthread = &ctx->GLThread;
178
179 switch (target) {
180 case GL_ARRAY_BUFFER:
181 glthread->CurrentArrayBufferName = buffer;
182 break;
183 case GL_ELEMENT_ARRAY_BUFFER:
184 /* The current element array buffer binding is actually tracked in the
185 * vertex array object instead of the context, so this would need to
186 * change on vertex array object updates.
187 */
188 glthread->CurrentVAO->CurrentElementBufferName = buffer;
189 break;
190 case GL_DRAW_INDIRECT_BUFFER:
191 glthread->CurrentDrawIndirectBufferName = buffer;
192 break;
193 case GL_PIXEL_PACK_BUFFER:
194 glthread->CurrentPixelPackBufferName = buffer;
195 break;
196 case GL_PIXEL_UNPACK_BUFFER:
197 glthread->CurrentPixelUnpackBufferName = buffer;
198 break;
199 case GL_QUERY_BUFFER:
200 glthread->CurrentQueryBufferName = buffer;
201 break;
202 }
203 }
204
205 void
_mesa_glthread_DeleteBuffers(struct gl_context * ctx,GLsizei n,const GLuint * buffers)206 _mesa_glthread_DeleteBuffers(struct gl_context *ctx, GLsizei n,
207 const GLuint *buffers)
208 {
209 struct glthread_state *glthread = &ctx->GLThread;
210
211 if (!buffers || n < 0)
212 return;
213
214 for (unsigned i = 0; i < n; i++) {
215 GLuint id = buffers[i];
216
217 if (id == glthread->CurrentArrayBufferName)
218 _mesa_glthread_BindBuffer(ctx, GL_ARRAY_BUFFER, 0);
219 if (id == glthread->CurrentVAO->CurrentElementBufferName)
220 _mesa_glthread_BindBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, 0);
221 if (id == glthread->CurrentDrawIndirectBufferName)
222 _mesa_glthread_BindBuffer(ctx, GL_DRAW_INDIRECT_BUFFER, 0);
223 if (id == glthread->CurrentPixelPackBufferName)
224 _mesa_glthread_BindBuffer(ctx, GL_PIXEL_PACK_BUFFER, 0);
225 if (id == glthread->CurrentPixelUnpackBufferName)
226 _mesa_glthread_BindBuffer(ctx, GL_PIXEL_UNPACK_BUFFER, 0);
227 }
228 }
229
230 /* BufferData: marshalled asynchronously */
231 struct marshal_cmd_BufferData
232 {
233 struct marshal_cmd_base cmd_base;
234 GLuint target_or_name;
235 GLsizeiptr size;
236 GLenum usage;
237 const GLvoid *data_external_mem;
238 bool data_null; /* If set, no data follows for "data" */
239 bool named;
240 bool ext_dsa;
241 /* Next size bytes are GLubyte data[size] */
242 };
243
244 uint32_t
_mesa_unmarshal_BufferData(struct gl_context * ctx,const struct marshal_cmd_BufferData * cmd,const uint64_t * last)245 _mesa_unmarshal_BufferData(struct gl_context *ctx,
246 const struct marshal_cmd_BufferData *cmd,
247 const uint64_t *last)
248 {
249 const GLuint target_or_name = cmd->target_or_name;
250 const GLsizei size = cmd->size;
251 const GLenum usage = cmd->usage;
252 const void *data;
253
254 if (cmd->data_null)
255 data = NULL;
256 else if (!cmd->named && target_or_name == GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD)
257 data = cmd->data_external_mem;
258 else
259 data = (const void *) (cmd + 1);
260
261 if (cmd->ext_dsa) {
262 CALL_NamedBufferDataEXT(ctx->CurrentServerDispatch,
263 (target_or_name, size, data, usage));
264 } else if (cmd->named) {
265 CALL_NamedBufferData(ctx->CurrentServerDispatch,
266 (target_or_name, size, data, usage));
267 } else {
268 CALL_BufferData(ctx->CurrentServerDispatch,
269 (target_or_name, size, data, usage));
270 }
271 return cmd->cmd_base.cmd_size;
272 }
273
274 uint32_t
_mesa_unmarshal_NamedBufferData(struct gl_context * ctx,const struct marshal_cmd_NamedBufferData * cmd,const uint64_t * last)275 _mesa_unmarshal_NamedBufferData(struct gl_context *ctx,
276 const struct marshal_cmd_NamedBufferData *cmd,
277 const uint64_t *last)
278 {
279 unreachable("never used - all BufferData variants use DISPATCH_CMD_BufferData");
280 return 0;
281 }
282
283 uint32_t
_mesa_unmarshal_NamedBufferDataEXT(struct gl_context * ctx,const struct marshal_cmd_NamedBufferDataEXT * cmd,const uint64_t * last)284 _mesa_unmarshal_NamedBufferDataEXT(struct gl_context *ctx,
285 const struct marshal_cmd_NamedBufferDataEXT *cmd,
286 const uint64_t *last)
287 {
288 unreachable("never used - all BufferData variants use DISPATCH_CMD_BufferData");
289 return 0;
290 }
291
292 static void
_mesa_marshal_BufferData_merged(GLuint target_or_name,GLsizeiptr size,const GLvoid * data,GLenum usage,bool named,bool ext_dsa,const char * func)293 _mesa_marshal_BufferData_merged(GLuint target_or_name, GLsizeiptr size,
294 const GLvoid *data, GLenum usage, bool named,
295 bool ext_dsa, const char *func)
296 {
297 GET_CURRENT_CONTEXT(ctx);
298 bool external_mem = !named &&
299 target_or_name == GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD;
300 bool copy_data = data && !external_mem;
301 size_t cmd_size = sizeof(struct marshal_cmd_BufferData) + (copy_data ? size : 0);
302
303 if (unlikely(size < 0 || size > INT_MAX || cmd_size > MARSHAL_MAX_CMD_SIZE ||
304 (named && target_or_name == 0))) {
305 _mesa_glthread_finish_before(ctx, func);
306 if (named) {
307 CALL_NamedBufferData(ctx->CurrentServerDispatch,
308 (target_or_name, size, data, usage));
309 } else {
310 CALL_BufferData(ctx->CurrentServerDispatch,
311 (target_or_name, size, data, usage));
312 }
313 return;
314 }
315
316 struct marshal_cmd_BufferData *cmd =
317 _mesa_glthread_allocate_command(ctx, DISPATCH_CMD_BufferData,
318 cmd_size);
319
320 cmd->target_or_name = target_or_name;
321 cmd->size = size;
322 cmd->usage = usage;
323 cmd->data_null = !data;
324 cmd->named = named;
325 cmd->ext_dsa = ext_dsa;
326 cmd->data_external_mem = data;
327
328 if (copy_data) {
329 char *variable_data = (char *) (cmd + 1);
330 memcpy(variable_data, data, size);
331 }
332 }
333
334 void GLAPIENTRY
_mesa_marshal_BufferData(GLenum target,GLsizeiptr size,const GLvoid * data,GLenum usage)335 _mesa_marshal_BufferData(GLenum target, GLsizeiptr size, const GLvoid * data,
336 GLenum usage)
337 {
338 _mesa_marshal_BufferData_merged(target, size, data, usage, false, false,
339 "BufferData");
340 }
341
342 void GLAPIENTRY
_mesa_marshal_NamedBufferData(GLuint buffer,GLsizeiptr size,const GLvoid * data,GLenum usage)343 _mesa_marshal_NamedBufferData(GLuint buffer, GLsizeiptr size,
344 const GLvoid * data, GLenum usage)
345 {
346 _mesa_marshal_BufferData_merged(buffer, size, data, usage, true, false,
347 "NamedBufferData");
348 }
349
350 void GLAPIENTRY
_mesa_marshal_NamedBufferDataEXT(GLuint buffer,GLsizeiptr size,const GLvoid * data,GLenum usage)351 _mesa_marshal_NamedBufferDataEXT(GLuint buffer, GLsizeiptr size,
352 const GLvoid *data, GLenum usage)
353 {
354 _mesa_marshal_BufferData_merged(buffer, size, data, usage, true, true,
355 "NamedBufferDataEXT");
356 }
357
358
359 /* BufferSubData: marshalled asynchronously */
360 struct marshal_cmd_BufferSubData
361 {
362 struct marshal_cmd_base cmd_base;
363 GLenum target_or_name;
364 GLintptr offset;
365 GLsizeiptr size;
366 bool named;
367 bool ext_dsa;
368 /* Next size bytes are GLubyte data[size] */
369 };
370
371 uint32_t
_mesa_unmarshal_BufferSubData(struct gl_context * ctx,const struct marshal_cmd_BufferSubData * cmd,const uint64_t * last)372 _mesa_unmarshal_BufferSubData(struct gl_context *ctx,
373 const struct marshal_cmd_BufferSubData *cmd,
374 const uint64_t *last)
375 {
376 const GLenum target_or_name = cmd->target_or_name;
377 const GLintptr offset = cmd->offset;
378 const GLsizeiptr size = cmd->size;
379 const void *data = (const void *) (cmd + 1);
380
381 if (cmd->ext_dsa) {
382 CALL_NamedBufferSubDataEXT(ctx->CurrentServerDispatch,
383 (target_or_name, offset, size, data));
384 } else if (cmd->named) {
385 CALL_NamedBufferSubData(ctx->CurrentServerDispatch,
386 (target_or_name, offset, size, data));
387 } else {
388 CALL_BufferSubData(ctx->CurrentServerDispatch,
389 (target_or_name, offset, size, data));
390 }
391 return cmd->cmd_base.cmd_size;
392 }
393
394 uint32_t
_mesa_unmarshal_NamedBufferSubData(struct gl_context * ctx,const struct marshal_cmd_NamedBufferSubData * cmd,const uint64_t * last)395 _mesa_unmarshal_NamedBufferSubData(struct gl_context *ctx,
396 const struct marshal_cmd_NamedBufferSubData *cmd,
397 const uint64_t *last)
398 {
399 unreachable("never used - all BufferSubData variants use DISPATCH_CMD_BufferSubData");
400 return 0;
401 }
402
403 uint32_t
_mesa_unmarshal_NamedBufferSubDataEXT(struct gl_context * ctx,const struct marshal_cmd_NamedBufferSubDataEXT * cmd,const uint64_t * last)404 _mesa_unmarshal_NamedBufferSubDataEXT(struct gl_context *ctx,
405 const struct marshal_cmd_NamedBufferSubDataEXT *cmd,
406 const uint64_t *last)
407 {
408 unreachable("never used - all BufferSubData variants use DISPATCH_CMD_BufferSubData");
409 return 0;
410 }
411
412 static void
_mesa_marshal_BufferSubData_merged(GLuint target_or_name,GLintptr offset,GLsizeiptr size,const GLvoid * data,bool named,bool ext_dsa,const char * func)413 _mesa_marshal_BufferSubData_merged(GLuint target_or_name, GLintptr offset,
414 GLsizeiptr size, const GLvoid *data,
415 bool named, bool ext_dsa, const char *func)
416 {
417 GET_CURRENT_CONTEXT(ctx);
418 size_t cmd_size = sizeof(struct marshal_cmd_BufferSubData) + size;
419
420 /* Fast path: Copy the data to an upload buffer, and use the GPU
421 * to copy the uploaded data to the destination buffer.
422 */
423 /* TODO: Handle offset == 0 && size < buffer_size.
424 * If offset == 0 and size == buffer_size, it's better to discard
425 * the buffer storage, but we don't know the buffer size in glthread.
426 */
427 if (ctx->GLThread.SupportsBufferUploads &&
428 data && offset > 0 && size > 0) {
429 struct gl_buffer_object *upload_buffer = NULL;
430 unsigned upload_offset = 0;
431
432 _mesa_glthread_upload(ctx, data, size, &upload_offset, &upload_buffer,
433 NULL);
434
435 if (upload_buffer) {
436 _mesa_marshal_InternalBufferSubDataCopyMESA((GLintptr)upload_buffer,
437 upload_offset,
438 target_or_name,
439 offset, size, named,
440 ext_dsa);
441 return;
442 }
443 }
444
445 if (unlikely(size < 0 || size > INT_MAX || cmd_size < 0 ||
446 cmd_size > MARSHAL_MAX_CMD_SIZE || !data ||
447 (named && target_or_name == 0))) {
448 _mesa_glthread_finish_before(ctx, func);
449 if (named) {
450 CALL_NamedBufferSubData(ctx->CurrentServerDispatch,
451 (target_or_name, offset, size, data));
452 } else {
453 CALL_BufferSubData(ctx->CurrentServerDispatch,
454 (target_or_name, offset, size, data));
455 }
456 return;
457 }
458
459 struct marshal_cmd_BufferSubData *cmd =
460 _mesa_glthread_allocate_command(ctx, DISPATCH_CMD_BufferSubData,
461 cmd_size);
462 cmd->target_or_name = target_or_name;
463 cmd->offset = offset;
464 cmd->size = size;
465 cmd->named = named;
466 cmd->ext_dsa = ext_dsa;
467
468 char *variable_data = (char *) (cmd + 1);
469 memcpy(variable_data, data, size);
470 }
471
472 void GLAPIENTRY
_mesa_marshal_BufferSubData(GLenum target,GLintptr offset,GLsizeiptr size,const GLvoid * data)473 _mesa_marshal_BufferSubData(GLenum target, GLintptr offset, GLsizeiptr size,
474 const GLvoid * data)
475 {
476 _mesa_marshal_BufferSubData_merged(target, offset, size, data, false,
477 false, "BufferSubData");
478 }
479
480 void GLAPIENTRY
_mesa_marshal_NamedBufferSubData(GLuint buffer,GLintptr offset,GLsizeiptr size,const GLvoid * data)481 _mesa_marshal_NamedBufferSubData(GLuint buffer, GLintptr offset,
482 GLsizeiptr size, const GLvoid * data)
483 {
484 _mesa_marshal_BufferSubData_merged(buffer, offset, size, data, true,
485 false, "NamedBufferSubData");
486 }
487
488 void GLAPIENTRY
_mesa_marshal_NamedBufferSubDataEXT(GLuint buffer,GLintptr offset,GLsizeiptr size,const GLvoid * data)489 _mesa_marshal_NamedBufferSubDataEXT(GLuint buffer, GLintptr offset,
490 GLsizeiptr size, const GLvoid * data)
491 {
492 _mesa_marshal_BufferSubData_merged(buffer, offset, size, data, true,
493 true, "NamedBufferSubDataEXT");
494 }
495