1 /*
2 * Copyright © 2012 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "main/glthread_marshal.h"
25 #include "main/dispatch.h"
26 #include "main/bufferobj.h"
27
28 /**
29 * Create an upload buffer. This is called from the app thread, so everything
30 * has to be thread-safe in the driver.
31 */
32 static struct gl_buffer_object *
new_upload_buffer(struct gl_context * ctx,GLsizeiptr size,uint8_t ** ptr)33 new_upload_buffer(struct gl_context *ctx, GLsizeiptr size, uint8_t **ptr)
34 {
35 assert(ctx->GLThread.SupportsBufferUploads);
36
37 struct gl_buffer_object *obj = ctx->Driver.NewBufferObject(ctx, -1);
38 if (!obj)
39 return NULL;
40
41 obj->Immutable = true;
42
43 if (!ctx->Driver.BufferData(ctx, GL_ARRAY_BUFFER, size, NULL,
44 GL_WRITE_ONLY,
45 GL_CLIENT_STORAGE_BIT | GL_MAP_WRITE_BIT,
46 obj)) {
47 ctx->Driver.DeleteBuffer(ctx, obj);
48 return NULL;
49 }
50
51 *ptr = ctx->Driver.MapBufferRange(ctx, 0, size,
52 GL_MAP_WRITE_BIT |
53 GL_MAP_UNSYNCHRONIZED_BIT |
54 MESA_MAP_THREAD_SAFE_BIT,
55 obj, MAP_GLTHREAD);
56 if (!*ptr) {
57 ctx->Driver.DeleteBuffer(ctx, obj);
58 return NULL;
59 }
60
61 return obj;
62 }
63
64 void
_mesa_glthread_upload(struct gl_context * ctx,const void * data,GLsizeiptr size,unsigned * out_offset,struct gl_buffer_object ** out_buffer,uint8_t ** out_ptr)65 _mesa_glthread_upload(struct gl_context *ctx, const void *data,
66 GLsizeiptr size, unsigned *out_offset,
67 struct gl_buffer_object **out_buffer,
68 uint8_t **out_ptr)
69 {
70 struct glthread_state *glthread = &ctx->GLThread;
71 const unsigned default_size = 1024 * 1024;
72
73 if (unlikely(size > INT_MAX))
74 return;
75
76 /* The alignment was chosen arbitrarily. */
77 unsigned offset = align(glthread->upload_offset, 8);
78
79 /* Allocate a new buffer if needed. */
80 if (unlikely(!glthread->upload_buffer || offset + size > default_size)) {
81 /* If the size is greater than the buffer size, allocate a separate buffer
82 * just for this upload.
83 */
84 if (unlikely(size > default_size)) {
85 uint8_t *ptr;
86
87 assert(*out_buffer == NULL);
88 *out_buffer = new_upload_buffer(ctx, size, &ptr);
89 if (!*out_buffer)
90 return;
91
92 *out_offset = 0;
93 if (data)
94 memcpy(ptr, data, size);
95 else
96 *out_ptr = ptr;
97 return;
98 }
99
100 if (glthread->upload_buffer_private_refcount > 0) {
101 p_atomic_add(&glthread->upload_buffer->RefCount,
102 -glthread->upload_buffer_private_refcount);
103 glthread->upload_buffer_private_refcount = 0;
104 }
105 _mesa_reference_buffer_object(ctx, &glthread->upload_buffer, NULL);
106 glthread->upload_buffer =
107 new_upload_buffer(ctx, default_size, &glthread->upload_ptr);
108 glthread->upload_offset = 0;
109 offset = 0;
110
111 /* Since atomic operations are very very slow when 2 threads are not
112 * sharing one L3 cache (which can happen on AMD Zen), prevent using
113 * atomics as follows:
114 *
115 * This function has to return a buffer reference to the caller.
116 * Instead of atomic_inc for every call, it does all possible future
117 * increments in advance when the upload buffer is allocated.
118 * The maximum number of times the function can be called per upload
119 * buffer is default_size, because the minimum allocation size is 1.
120 * Therefore the function can only return default_size number of
121 * references at most, so we will never need more. This is the number
122 * that is added to RefCount at allocation.
123 *
124 * upload_buffer_private_refcount tracks how many buffer references
125 * are left to return to callers. If the buffer is full and there are
126 * still references left, they are atomically subtracted from RefCount
127 * before the buffer is unreferenced.
128 *
129 * This can increase performance by 20%.
130 */
131 glthread->upload_buffer->RefCount += default_size;
132 glthread->upload_buffer_private_refcount = default_size;
133 }
134
135 /* Upload data. */
136 if (data)
137 memcpy(glthread->upload_ptr + offset, data, size);
138 else
139 *out_ptr = glthread->upload_ptr + offset;
140
141 glthread->upload_offset = offset + size;
142 *out_offset = offset;
143
144 assert(*out_buffer == NULL);
145 assert(glthread->upload_buffer_private_refcount > 0);
146 *out_buffer = glthread->upload_buffer;
147 glthread->upload_buffer_private_refcount--;
148 }
149
150 /** Tracks the current bindings for the vertex array and index array buffers.
151 *
152 * This is part of what we need to enable glthread on compat-GL contexts that
153 * happen to use VBOs, without also supporting the full tracking of VBO vs
154 * user vertex array bindings per attribute on each vertex array for
155 * determining what to upload at draw call time.
156 *
157 * Note that GL core makes it so that a buffer binding with an invalid handle
158 * in the "buffer" parameter will throw an error, and then a
159 * glVertexAttribPointer() that followsmight not end up pointing at a VBO.
160 * However, in GL core the draw call would throw an error as well, so we don't
161 * really care if our tracking is wrong for this case -- we never need to
162 * marshal user data for draw calls, and the unmarshal will just generate an
163 * error or not as appropriate.
164 *
165 * For compatibility GL, we do need to accurately know whether the draw call
166 * on the unmarshal side will dereference a user pointer or load data from a
167 * VBO per vertex. That would make it seem like we need to track whether a
168 * "buffer" is valid, so that we can know when an error will be generated
169 * instead of updating the binding. However, compat GL has the ridiculous
170 * feature that if you pass a bad name, it just gens a buffer object for you,
171 * so we escape without having to know if things are valid or not.
172 */
173 void
_mesa_glthread_BindBuffer(struct gl_context * ctx,GLenum target,GLuint buffer)174 _mesa_glthread_BindBuffer(struct gl_context *ctx, GLenum target, GLuint buffer)
175 {
176 struct glthread_state *glthread = &ctx->GLThread;
177
178 switch (target) {
179 case GL_ARRAY_BUFFER:
180 glthread->CurrentArrayBufferName = buffer;
181 break;
182 case GL_ELEMENT_ARRAY_BUFFER:
183 /* The current element array buffer binding is actually tracked in the
184 * vertex array object instead of the context, so this would need to
185 * change on vertex array object updates.
186 */
187 glthread->CurrentVAO->CurrentElementBufferName = buffer;
188 break;
189 case GL_DRAW_INDIRECT_BUFFER:
190 glthread->CurrentDrawIndirectBufferName = buffer;
191 break;
192 case GL_PIXEL_PACK_BUFFER:
193 glthread->CurrentPixelPackBufferName = buffer;
194 break;
195 case GL_PIXEL_UNPACK_BUFFER:
196 glthread->CurrentPixelUnpackBufferName = buffer;
197 break;
198 }
199 }
200
201 void
_mesa_glthread_DeleteBuffers(struct gl_context * ctx,GLsizei n,const GLuint * buffers)202 _mesa_glthread_DeleteBuffers(struct gl_context *ctx, GLsizei n,
203 const GLuint *buffers)
204 {
205 struct glthread_state *glthread = &ctx->GLThread;
206
207 if (!buffers)
208 return;
209
210 for (unsigned i = 0; i < n; i++) {
211 GLuint id = buffers[i];
212
213 if (id == glthread->CurrentArrayBufferName)
214 _mesa_glthread_BindBuffer(ctx, GL_ARRAY_BUFFER, 0);
215 if (id == glthread->CurrentVAO->CurrentElementBufferName)
216 _mesa_glthread_BindBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, 0);
217 if (id == glthread->CurrentDrawIndirectBufferName)
218 _mesa_glthread_BindBuffer(ctx, GL_DRAW_INDIRECT_BUFFER, 0);
219 if (id == glthread->CurrentPixelPackBufferName)
220 _mesa_glthread_BindBuffer(ctx, GL_PIXEL_PACK_BUFFER, 0);
221 if (id == glthread->CurrentPixelUnpackBufferName)
222 _mesa_glthread_BindBuffer(ctx, GL_PIXEL_UNPACK_BUFFER, 0);
223 }
224 }
225
226 /* BufferData: marshalled asynchronously */
227 struct marshal_cmd_BufferData
228 {
229 struct marshal_cmd_base cmd_base;
230 GLuint target_or_name;
231 GLsizeiptr size;
232 GLenum usage;
233 const GLvoid *data_external_mem;
234 bool data_null; /* If set, no data follows for "data" */
235 bool named;
236 bool ext_dsa;
237 /* Next size bytes are GLubyte data[size] */
238 };
239
240 uint32_t
_mesa_unmarshal_BufferData(struct gl_context * ctx,const struct marshal_cmd_BufferData * cmd,const uint64_t * last)241 _mesa_unmarshal_BufferData(struct gl_context *ctx,
242 const struct marshal_cmd_BufferData *cmd,
243 const uint64_t *last)
244 {
245 const GLuint target_or_name = cmd->target_or_name;
246 const GLsizei size = cmd->size;
247 const GLenum usage = cmd->usage;
248 const void *data;
249
250 if (cmd->data_null)
251 data = NULL;
252 else if (!cmd->named && target_or_name == GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD)
253 data = cmd->data_external_mem;
254 else
255 data = (const void *) (cmd + 1);
256
257 if (cmd->ext_dsa) {
258 CALL_NamedBufferDataEXT(ctx->CurrentServerDispatch,
259 (target_or_name, size, data, usage));
260 } else if (cmd->named) {
261 CALL_NamedBufferData(ctx->CurrentServerDispatch,
262 (target_or_name, size, data, usage));
263 } else {
264 CALL_BufferData(ctx->CurrentServerDispatch,
265 (target_or_name, size, data, usage));
266 }
267 return cmd->cmd_base.cmd_size;
268 }
269
270 uint32_t
_mesa_unmarshal_NamedBufferData(struct gl_context * ctx,const struct marshal_cmd_NamedBufferData * cmd,const uint64_t * last)271 _mesa_unmarshal_NamedBufferData(struct gl_context *ctx,
272 const struct marshal_cmd_NamedBufferData *cmd,
273 const uint64_t *last)
274 {
275 unreachable("never used - all BufferData variants use DISPATCH_CMD_BufferData");
276 return 0;
277 }
278
279 uint32_t
_mesa_unmarshal_NamedBufferDataEXT(struct gl_context * ctx,const struct marshal_cmd_NamedBufferDataEXT * cmd,const uint64_t * last)280 _mesa_unmarshal_NamedBufferDataEXT(struct gl_context *ctx,
281 const struct marshal_cmd_NamedBufferDataEXT *cmd,
282 const uint64_t *last)
283 {
284 unreachable("never used - all BufferData variants use DISPATCH_CMD_BufferData");
285 return 0;
286 }
287
288 static void
_mesa_marshal_BufferData_merged(GLuint target_or_name,GLsizeiptr size,const GLvoid * data,GLenum usage,bool named,bool ext_dsa,const char * func)289 _mesa_marshal_BufferData_merged(GLuint target_or_name, GLsizeiptr size,
290 const GLvoid *data, GLenum usage, bool named,
291 bool ext_dsa, const char *func)
292 {
293 GET_CURRENT_CONTEXT(ctx);
294 bool external_mem = !named &&
295 target_or_name == GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD;
296 bool copy_data = data && !external_mem;
297 size_t cmd_size = sizeof(struct marshal_cmd_BufferData) + (copy_data ? size : 0);
298
299 if (unlikely(size < 0 || size > INT_MAX || cmd_size > MARSHAL_MAX_CMD_SIZE ||
300 (named && target_or_name == 0))) {
301 _mesa_glthread_finish_before(ctx, func);
302 if (named) {
303 CALL_NamedBufferData(ctx->CurrentServerDispatch,
304 (target_or_name, size, data, usage));
305 } else {
306 CALL_BufferData(ctx->CurrentServerDispatch,
307 (target_or_name, size, data, usage));
308 }
309 return;
310 }
311
312 struct marshal_cmd_BufferData *cmd =
313 _mesa_glthread_allocate_command(ctx, DISPATCH_CMD_BufferData,
314 cmd_size);
315
316 cmd->target_or_name = target_or_name;
317 cmd->size = size;
318 cmd->usage = usage;
319 cmd->data_null = !data;
320 cmd->named = named;
321 cmd->ext_dsa = ext_dsa;
322 cmd->data_external_mem = data;
323
324 if (copy_data) {
325 char *variable_data = (char *) (cmd + 1);
326 memcpy(variable_data, data, size);
327 }
328 }
329
330 void GLAPIENTRY
_mesa_marshal_BufferData(GLenum target,GLsizeiptr size,const GLvoid * data,GLenum usage)331 _mesa_marshal_BufferData(GLenum target, GLsizeiptr size, const GLvoid * data,
332 GLenum usage)
333 {
334 _mesa_marshal_BufferData_merged(target, size, data, usage, false, false,
335 "BufferData");
336 }
337
338 void GLAPIENTRY
_mesa_marshal_NamedBufferData(GLuint buffer,GLsizeiptr size,const GLvoid * data,GLenum usage)339 _mesa_marshal_NamedBufferData(GLuint buffer, GLsizeiptr size,
340 const GLvoid * data, GLenum usage)
341 {
342 _mesa_marshal_BufferData_merged(buffer, size, data, usage, true, false,
343 "NamedBufferData");
344 }
345
346 void GLAPIENTRY
_mesa_marshal_NamedBufferDataEXT(GLuint buffer,GLsizeiptr size,const GLvoid * data,GLenum usage)347 _mesa_marshal_NamedBufferDataEXT(GLuint buffer, GLsizeiptr size,
348 const GLvoid *data, GLenum usage)
349 {
350 _mesa_marshal_BufferData_merged(buffer, size, data, usage, true, true,
351 "NamedBufferDataEXT");
352 }
353
354
355 /* BufferSubData: marshalled asynchronously */
356 struct marshal_cmd_BufferSubData
357 {
358 struct marshal_cmd_base cmd_base;
359 GLenum target_or_name;
360 GLintptr offset;
361 GLsizeiptr size;
362 bool named;
363 bool ext_dsa;
364 /* Next size bytes are GLubyte data[size] */
365 };
366
367 uint32_t
_mesa_unmarshal_BufferSubData(struct gl_context * ctx,const struct marshal_cmd_BufferSubData * cmd,const uint64_t * last)368 _mesa_unmarshal_BufferSubData(struct gl_context *ctx,
369 const struct marshal_cmd_BufferSubData *cmd,
370 const uint64_t *last)
371 {
372 const GLenum target_or_name = cmd->target_or_name;
373 const GLintptr offset = cmd->offset;
374 const GLsizeiptr size = cmd->size;
375 const void *data = (const void *) (cmd + 1);
376
377 if (cmd->ext_dsa) {
378 CALL_NamedBufferSubDataEXT(ctx->CurrentServerDispatch,
379 (target_or_name, offset, size, data));
380 } else if (cmd->named) {
381 CALL_NamedBufferSubData(ctx->CurrentServerDispatch,
382 (target_or_name, offset, size, data));
383 } else {
384 CALL_BufferSubData(ctx->CurrentServerDispatch,
385 (target_or_name, offset, size, data));
386 }
387 return cmd->cmd_base.cmd_size;
388 }
389
390 uint32_t
_mesa_unmarshal_NamedBufferSubData(struct gl_context * ctx,const struct marshal_cmd_NamedBufferSubData * cmd,const uint64_t * last)391 _mesa_unmarshal_NamedBufferSubData(struct gl_context *ctx,
392 const struct marshal_cmd_NamedBufferSubData *cmd,
393 const uint64_t *last)
394 {
395 unreachable("never used - all BufferSubData variants use DISPATCH_CMD_BufferSubData");
396 return 0;
397 }
398
399 uint32_t
_mesa_unmarshal_NamedBufferSubDataEXT(struct gl_context * ctx,const struct marshal_cmd_NamedBufferSubDataEXT * cmd,const uint64_t * last)400 _mesa_unmarshal_NamedBufferSubDataEXT(struct gl_context *ctx,
401 const struct marshal_cmd_NamedBufferSubDataEXT *cmd,
402 const uint64_t *last)
403 {
404 unreachable("never used - all BufferSubData variants use DISPATCH_CMD_BufferSubData");
405 return 0;
406 }
407
408 static void
_mesa_marshal_BufferSubData_merged(GLuint target_or_name,GLintptr offset,GLsizeiptr size,const GLvoid * data,bool named,bool ext_dsa,const char * func)409 _mesa_marshal_BufferSubData_merged(GLuint target_or_name, GLintptr offset,
410 GLsizeiptr size, const GLvoid *data,
411 bool named, bool ext_dsa, const char *func)
412 {
413 GET_CURRENT_CONTEXT(ctx);
414 size_t cmd_size = sizeof(struct marshal_cmd_BufferSubData) + size;
415
416 /* Fast path: Copy the data to an upload buffer, and use the GPU
417 * to copy the uploaded data to the destination buffer.
418 */
419 /* TODO: Handle offset == 0 && size < buffer_size.
420 * If offset == 0 and size == buffer_size, it's better to discard
421 * the buffer storage, but we don't know the buffer size in glthread.
422 */
423 if (ctx->GLThread.SupportsBufferUploads &&
424 data && offset > 0 && size > 0) {
425 struct gl_buffer_object *upload_buffer = NULL;
426 unsigned upload_offset = 0;
427
428 _mesa_glthread_upload(ctx, data, size, &upload_offset, &upload_buffer,
429 NULL);
430
431 if (upload_buffer) {
432 _mesa_marshal_InternalBufferSubDataCopyMESA((GLintptr)upload_buffer,
433 upload_offset,
434 target_or_name,
435 offset, size, named,
436 ext_dsa);
437 return;
438 }
439 }
440
441 if (unlikely(size < 0 || size > INT_MAX || cmd_size < 0 ||
442 cmd_size > MARSHAL_MAX_CMD_SIZE || !data ||
443 (named && target_or_name == 0))) {
444 _mesa_glthread_finish_before(ctx, func);
445 if (named) {
446 CALL_NamedBufferSubData(ctx->CurrentServerDispatch,
447 (target_or_name, offset, size, data));
448 } else {
449 CALL_BufferSubData(ctx->CurrentServerDispatch,
450 (target_or_name, offset, size, data));
451 }
452 return;
453 }
454
455 struct marshal_cmd_BufferSubData *cmd =
456 _mesa_glthread_allocate_command(ctx, DISPATCH_CMD_BufferSubData,
457 cmd_size);
458 cmd->target_or_name = target_or_name;
459 cmd->offset = offset;
460 cmd->size = size;
461 cmd->named = named;
462 cmd->ext_dsa = ext_dsa;
463
464 char *variable_data = (char *) (cmd + 1);
465 memcpy(variable_data, data, size);
466 }
467
468 void GLAPIENTRY
_mesa_marshal_BufferSubData(GLenum target,GLintptr offset,GLsizeiptr size,const GLvoid * data)469 _mesa_marshal_BufferSubData(GLenum target, GLintptr offset, GLsizeiptr size,
470 const GLvoid * data)
471 {
472 _mesa_marshal_BufferSubData_merged(target, offset, size, data, false,
473 false, "BufferSubData");
474 }
475
476 void GLAPIENTRY
_mesa_marshal_NamedBufferSubData(GLuint buffer,GLintptr offset,GLsizeiptr size,const GLvoid * data)477 _mesa_marshal_NamedBufferSubData(GLuint buffer, GLintptr offset,
478 GLsizeiptr size, const GLvoid * data)
479 {
480 _mesa_marshal_BufferSubData_merged(buffer, offset, size, data, true,
481 false, "NamedBufferSubData");
482 }
483
484 void GLAPIENTRY
_mesa_marshal_NamedBufferSubDataEXT(GLuint buffer,GLintptr offset,GLsizeiptr size,const GLvoid * data)485 _mesa_marshal_NamedBufferSubDataEXT(GLuint buffer, GLintptr offset,
486 GLsizeiptr size, const GLvoid * data)
487 {
488 _mesa_marshal_BufferSubData_merged(buffer, offset, size, data, true,
489 true, "NamedBufferSubDataEXT");
490 }
491